{
 "cells": [
  {
   "cell_type": "markdown",
   "source": [
    "## 读取和准备数据"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "from matplotlib import pyplot as plt\n",
    "%matplotlib inline"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-05T05:41:25.907252200Z",
     "start_time": "2023-12-05T05:41:25.896266800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [
    {
     "data": {
      "text/plain": "11914"
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('F:/机器学习数据集/cardataset/data.csv')\n",
    "len(df)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-05T05:45:31.957403700Z",
     "start_time": "2023-12-05T05:45:31.321425900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [
    {
     "data": {
      "text/plain": "  Make       Model  Year             Engine Fuel Type  Engine HP  \\\n0  BMW  1 Series M  2011  premium unleaded (required)      335.0   \n1  BMW    1 Series  2011  premium unleaded (required)      300.0   \n2  BMW    1 Series  2011  premium unleaded (required)      300.0   \n3  BMW    1 Series  2011  premium unleaded (required)      230.0   \n4  BMW    1 Series  2011  premium unleaded (required)      230.0   \n\n   Engine Cylinders Transmission Type     Driven_Wheels  Number of Doors  \\\n0               6.0            MANUAL  rear wheel drive              2.0   \n1               6.0            MANUAL  rear wheel drive              2.0   \n2               6.0            MANUAL  rear wheel drive              2.0   \n3               6.0            MANUAL  rear wheel drive              2.0   \n4               6.0            MANUAL  rear wheel drive              2.0   \n\n                         Market Category Vehicle Size Vehicle Style  \\\n0  Factory Tuner,Luxury,High-Performance      Compact         Coupe   \n1                     Luxury,Performance      Compact   Convertible   \n2                Luxury,High-Performance      Compact         Coupe   \n3                     Luxury,Performance      Compact         Coupe   \n4                                 Luxury      Compact   Convertible   \n\n   highway MPG  city mpg  Popularity   MSRP  \n0           26        19        3916  46135  \n1           28        19        3916  40650  \n2           28        20        3916  36350  \n3           28        18        3916  29450  \n4           28        18        3916  34500  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Make</th>\n      <th>Model</th>\n      <th>Year</th>\n      <th>Engine Fuel Type</th>\n      <th>Engine HP</th>\n      <th>Engine Cylinders</th>\n      <th>Transmission Type</th>\n      <th>Driven_Wheels</th>\n      <th>Number of Doors</th>\n      <th>Market Category</th>\n      <th>Vehicle Size</th>\n      <th>Vehicle Style</th>\n      <th>highway MPG</th>\n      <th>city mpg</th>\n      <th>Popularity</th>\n      <th>MSRP</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>BMW</td>\n      <td>1 Series M</td>\n      <td>2011</td>\n      <td>premium unleaded (required)</td>\n      <td>335.0</td>\n      <td>6.0</td>\n      <td>MANUAL</td>\n      <td>rear wheel drive</td>\n      <td>2.0</td>\n      <td>Factory Tuner,Luxury,High-Performance</td>\n      <td>Compact</td>\n      <td>Coupe</td>\n      <td>26</td>\n      <td>19</td>\n      <td>3916</td>\n      <td>46135</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>BMW</td>\n      <td>1 Series</td>\n      <td>2011</td>\n      <td>premium unleaded (required)</td>\n      <td>300.0</td>\n      <td>6.0</td>\n      <td>MANUAL</td>\n      <td>rear wheel drive</td>\n      <td>2.0</td>\n      <td>Luxury,Performance</td>\n      <td>Compact</td>\n      <td>Convertible</td>\n      <td>28</td>\n      <td>19</td>\n      <td>3916</td>\n      <td>40650</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>BMW</td>\n      <td>1 Series</td>\n      <td>2011</td>\n      <td>premium unleaded (required)</td>\n      <td>300.0</td>\n      <td>6.0</td>\n      <td>MANUAL</td>\n      <td>rear wheel drive</td>\n      <td>2.0</td>\n      <td>Luxury,High-Performance</td>\n      <td>Compact</td>\n      <td>Coupe</td>\n      <td>28</td>\n      <td>20</td>\n      <td>3916</td>\n      <td>36350</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>BMW</td>\n      <td>1 Series</td>\n      <td>2011</td>\n      <td>premium unleaded (required)</td>\n      <td>230.0</td>\n      <td>6.0</td>\n      <td>MANUAL</td>\n      <td>rear wheel drive</td>\n      <td>2.0</td>\n      <td>Luxury,Performance</td>\n      <td>Compact</td>\n      <td>Coupe</td>\n      <td>28</td>\n      <td>18</td>\n      <td>3916</td>\n      <td>29450</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>BMW</td>\n      <td>1 Series</td>\n      <td>2011</td>\n      <td>premium unleaded (required)</td>\n      <td>230.0</td>\n      <td>6.0</td>\n      <td>MANUAL</td>\n      <td>rear wheel drive</td>\n      <td>2.0</td>\n      <td>Luxury</td>\n      <td>Compact</td>\n      <td>Convertible</td>\n      <td>28</td>\n      <td>18</td>\n      <td>3916</td>\n      <td>34500</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-05T05:45:38.732831200Z",
     "start_time": "2023-12-05T05:45:38.372485600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "outputs": [],
   "source": [
    "df.columns = df.columns.str.lower().str.replace(' ','_') # 所有列名小写并用下划线替换空格\n",
    "string_columns = list(df.dtypes[df.dtypes=='object'].index) # 只选择带有字符串值的列\n",
    "for col in string_columns:\n",
    "    df[col] = df[col].str.lower().str.replace(' ','_') # df的所有字符串列的值使用下划线替换空格并采用小写字母"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-05T05:46:41.292511800Z",
     "start_time": "2023-12-05T05:46:41.141509100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [
    {
     "data": {
      "text/plain": "  make       model  year             engine_fuel_type  engine_hp  \\\n0  bmw  1_series_m  2011  premium_unleaded_(required)      335.0   \n1  bmw    1_series  2011  premium_unleaded_(required)      300.0   \n2  bmw    1_series  2011  premium_unleaded_(required)      300.0   \n3  bmw    1_series  2011  premium_unleaded_(required)      230.0   \n4  bmw    1_series  2011  premium_unleaded_(required)      230.0   \n\n   engine_cylinders transmission_type     driven_wheels  number_of_doors  \\\n0               6.0            manual  rear_wheel_drive              2.0   \n1               6.0            manual  rear_wheel_drive              2.0   \n2               6.0            manual  rear_wheel_drive              2.0   \n3               6.0            manual  rear_wheel_drive              2.0   \n4               6.0            manual  rear_wheel_drive              2.0   \n\n                         market_category vehicle_size vehicle_style  \\\n0  factory_tuner,luxury,high-performance      compact         coupe   \n1                     luxury,performance      compact   convertible   \n2                luxury,high-performance      compact         coupe   \n3                     luxury,performance      compact         coupe   \n4                                 luxury      compact   convertible   \n\n   highway_mpg  city_mpg  popularity   msrp  \n0           26        19        3916  46135  \n1           28        19        3916  40650  \n2           28        20        3916  36350  \n3           28        18        3916  29450  \n4           28        18        3916  34500  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>make</th>\n      <th>model</th>\n      <th>year</th>\n      <th>engine_fuel_type</th>\n      <th>engine_hp</th>\n      <th>engine_cylinders</th>\n      <th>transmission_type</th>\n      <th>driven_wheels</th>\n      <th>number_of_doors</th>\n      <th>market_category</th>\n      <th>vehicle_size</th>\n      <th>vehicle_style</th>\n      <th>highway_mpg</th>\n      <th>city_mpg</th>\n      <th>popularity</th>\n      <th>msrp</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>bmw</td>\n      <td>1_series_m</td>\n      <td>2011</td>\n      <td>premium_unleaded_(required)</td>\n      <td>335.0</td>\n      <td>6.0</td>\n      <td>manual</td>\n      <td>rear_wheel_drive</td>\n      <td>2.0</td>\n      <td>factory_tuner,luxury,high-performance</td>\n      <td>compact</td>\n      <td>coupe</td>\n      <td>26</td>\n      <td>19</td>\n      <td>3916</td>\n      <td>46135</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>bmw</td>\n      <td>1_series</td>\n      <td>2011</td>\n      <td>premium_unleaded_(required)</td>\n      <td>300.0</td>\n      <td>6.0</td>\n      <td>manual</td>\n      <td>rear_wheel_drive</td>\n      <td>2.0</td>\n      <td>luxury,performance</td>\n      <td>compact</td>\n      <td>convertible</td>\n      <td>28</td>\n      <td>19</td>\n      <td>3916</td>\n      <td>40650</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>bmw</td>\n      <td>1_series</td>\n      <td>2011</td>\n      <td>premium_unleaded_(required)</td>\n      <td>300.0</td>\n      <td>6.0</td>\n      <td>manual</td>\n      <td>rear_wheel_drive</td>\n      <td>2.0</td>\n      <td>luxury,high-performance</td>\n      <td>compact</td>\n      <td>coupe</td>\n      <td>28</td>\n      <td>20</td>\n      <td>3916</td>\n      <td>36350</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>bmw</td>\n      <td>1_series</td>\n      <td>2011</td>\n      <td>premium_unleaded_(required)</td>\n      <td>230.0</td>\n      <td>6.0</td>\n      <td>manual</td>\n      <td>rear_wheel_drive</td>\n      <td>2.0</td>\n      <td>luxury,performance</td>\n      <td>compact</td>\n      <td>coupe</td>\n      <td>28</td>\n      <td>18</td>\n      <td>3916</td>\n      <td>29450</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>bmw</td>\n      <td>1_series</td>\n      <td>2011</td>\n      <td>premium_unleaded_(required)</td>\n      <td>230.0</td>\n      <td>6.0</td>\n      <td>manual</td>\n      <td>rear_wheel_drive</td>\n      <td>2.0</td>\n      <td>luxury</td>\n      <td>compact</td>\n      <td>convertible</td>\n      <td>28</td>\n      <td>18</td>\n      <td>3916</td>\n      <td>34500</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-05T05:46:42.671219800Z",
     "start_time": "2023-12-05T05:46:42.375222900Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 探索性分析"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "outputs": [
    {
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEDCAYAAAA849PJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAQpElEQVR4nO3db4xc1XnH8e/O+O8ma09a7NBKKDRp88itStqQAg02WCmJAyGlSkVfoCSkoZRYrojaSDSAqURxRBJR0roRpF2K+NeoaoyiEogBiabI3rZBQVQyCjzIqEmkVpEckrXXNbbxevpiZpVhszszvmtm7D3fz6uZc869c+6zV/Pbe+/MnZFms4kkqTy1YU9AkjQcBoAkFcoAkKRCGQCSVCgDQJIKtWTYE+jX8ePHm9PT1T6xVK+PUHXZklin/lin3qxRfwZRp6VL6z8C1szVd9oEwPR0k8nJQ5WWbTRGKy9bEuvUH+vUmzXqzyDqtGbN2Pfn6/MUkCQVygCQpEIZAJJUKANAkgplAEhSoQwASSqUASBJhTIAJKlQBoAkFeq0+SbwQr151UpWLp97c189coyDB14d8Iwkabj6CoCIOB/4QmZujIhfBu4DmsDzwJbMPB4R1wLXAceAbZn5aESsBB4C1gJTwNWZuS8iLgD+pj32ycy89WRv2Gwrly/h7M8+Nmff9z7/IQ6+0ROQpFNMz1NAEXEDcA+wot10J7A1MzcAI8AVEXEmcD1wIbAJuD0ilgObgT3tsQ8AW9vr+ApwFbAeOD8i3n3yNkmS1I9+rgG8DHyk4/m5wNPtxzuBS4DzgInMPJKZ+4G9wDm03uAf7xwbEauA5Zn5cmY2gSeA31nwlkiSTkjPU0CZ+XBEnN3RNNJ+44bWaZ3VwCpgf8eYudo72w7MGvv2XvOo10doNEZ7DZtn2d45V3Xdi0m9XrMOfbBOvVmj/gy7TlUuAh/veDwGTNJ6Qx/r0d5rbFcLvR10rVbvOsZb13oL335Zp96sUX8GdDvoefuqfAz0uYjY2H58KbALeAbYEBErImI1sI7WBeIJ4LLOsZl5ADgaEe+IiBFa1wx2VZiHJGkBqhwBfAYYj4hlwAvAjsycjojttN7Ia8DNmXk4Iu4G7o+I3cBRWhd+AT4F/CNQp/UpoG8vdEMkSSdmpNk8PX627bXXppsLOQW0dGm968dA9+2bWsj0FgUP2/tjnXqzRv0Z0CmgZ4H3zNXnN4ElqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoQwASSqUASBJhTIAJKlQBoAkFcoAkKRCGQCSVCgDQJIKZQBIUqEMAEkqlAEgSYUyACSpUAaAJBXKAJCkQhkAklQoA0CSCmUASFKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoQwASSqUASBJhTIAJKlQS6osFBFLgfuBs4Fp4FrgGHAf0ASeB7Zk5vGIuBa4rt2/LTMfjYiVwEPAWmAKuDoz9y1sUyRJJ6LqEcBlwJLMfC/wl8DngDuBrZm5ARgBroiIM4HrgQuBTcDtEbEc2AzsaY99ANi6sM2QJJ2oSkcAwEvAkoioAauA14ALgKfb/TuBD9A6OpjIzCPAkYjYC5wDrAe+2DH2ll4vWK+P0GiMVppsvd4756quezGp12vWoQ/WqTdr1J9h16lqABykdfrnReAM4HLgosxstvungNW0wmF/x3Jztc+0dTU93WRy8lClyTYao9Rq9a5jqq57MWk0Rq1DH6xTb9aoP4Oo05o1Y/P2VT0F9KfAE5n5TuBdtK4HLOvoHwMmgQPtx93aZ9okSQNUNQB+wk//g/8xsBR4LiI2ttsuBXYBzwAbImJFRKwG1tG6QDxB6zpC51hJ0gBVPQX0JeDeiNhF6z//m4DvAOMRsQx4AdiRmdMRsZ3WG3wNuDkzD0fE3cD9EbEbOApctdANkSSdmEoBkJkHgT+Yo+viOcaOA+Oz2g4BV1Z5bUnSyeEXwSSpUAaAJBXKAJCkQhkAklQoA0CSCmUASFKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoQwASSqUASBJhTIAJKlQBoAkFcoAkKRCGQCSVCgDQJIKZQBIUqEMAEkqlAEgSYUyACSpUAaAJBXKAJCkQhkAklQoA0CSCmUASFKhDABJKpQBIEmFWlJ1wYi4EfhdYBlwF/A0cB/QBJ4HtmTm8Yi4FrgOOAZsy8xHI2Il8BCwFpgCrs7MfQvZEEnSial0BBARG4H3AhcCFwNnAXcCWzNzAzACXBERZwLXt8dtAm6PiOXAZmBPe+wDwNYFbock6QRVPQW0CdgDfB34BvAocC6towCAncAlwHnARGYeycz9wF7gHGA98PissZKkAap6CugM4G3A5cAvAY8AtcxstvungNXAKmB/x3Jztc+0dVWvj9BojFaabL3eO+eqrnsxqddr1qEP1qk3a9SfYdepagC8AryYmUeBjIjDtE4DzRgDJoED7cfd2mfaupqebjI5eajSZBuNUWq1etcxVde9mDQao9ahD9apN2vUn0HUac2asXn7qp4C2g18MCJGIuIXgTcBT7WvDQBcCuwCngE2RMSKiFgNrKN1gXgCuGzWWEnSAFU6Amh/kuciWm/wNWAL8N/AeEQsA14AdmTmdERsp/UGXwNuzszDEXE3cH9E7AaOAledhG2RJJ2Ayh8Dzcwb5mi+eI5x48D4rLZDwJVVX1uStHB+EUySCmUASFKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoQwASSqUASBJhTIAJKlQBoAkFcoAkKRCGQCSVCgDQJIKZQBIUqEMAEkqlAEgSYUyACSpUAaAJBXKAJCkQhkAklQoA0CSCmUASFKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgq1ZCELR8Ra4Fng/cAx4D6gCTwPbMnM4xFxLXBdu39bZj4aESuBh4C1wBRwdWbuW8hcJEknpvIRQEQsBf4OeLXddCewNTM3ACPAFRFxJnA9cCGwCbg9IpYDm4E97bEPAFurb4IkqYqFnAK6A/gK8L/t5+cCT7cf7wQuAc4DJjLzSGbuB/YC5wDrgcdnjZUkDVClU0AR8QlgX2Y+ERE3tptHMrPZfjwFrAZWAfs7Fp2rfaatq3p9hEZjtMp0qdd751zVdS8m9XrNOvTBOvVmjfoz7DpVvQbwSaAZEZcAv0HrNM7ajv4xYBI40H7crX2mravp6SaTk4cqTbbRGKVWq3cdU3Xdi0mjMWod+mCderNG/RlEndasGZu3r9IpoMy8KDMvzsyNwH8BHwd2RsTG9pBLgV3AM8CGiFgREauBdbQuEE8Al80aK0kaoJP5MdDPALdGxH8Ay4AdmflDYDutN/h/BW7OzMPA3cCvRcRu4I+BW0/iPCRJfVjQx0AB2kcBMy6eo38cGJ/Vdgi4cqGvLUmqzi+CSVKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoQwASSqUASBJhTIAJKlQBoAkFcoAkKRCGQCSVCgDQJIKZQBIUqEMAEkqlAEgSYUyACSpUAaAJBXKAJCkQhkAklQoA0CSCmUASFKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoZZUWSgilgL3AmcDy4FtwHeB+4Am8DywJTOPR8S1wHXAMWBbZj4aESuBh4C1wBRwdWbuW9imSJJORNUjgI8Cr2TmBuBS4MvAncDWdtsIcEVEnAlcD1wIbAJuj4jlwGZgT3vsA8DWhW2GJOlEVToCAL4G7Oh4fgw4F3i6/Xwn8AFgGpjIzCPAkYjYC5wDrAe+2DH2ll4vWK+P0GiMVppsvd4756quezGp12vWoQ/WqTdr1J9h16lSAGTmQYCIGKMVBFuBOzKz2R4yBawGVgH7Oxadq32mravp6SaTk4eqTJdGY5Rard51TNV1LyaNxqh16IN16s0a9WcQdVqzZmzevsoXgSPiLOBbwIOZ+VXgeEf3GDAJHGg/7tY+0yZJGqBKARARbwWeBP48M+9tNz8XERvbjy8FdgHPABsiYkVErAbW0bpAPAFcNmusJGmAql4DuAl4C3BLRMycv/80sD0ilgEvADsyczoittN6g68BN2fm4Yi4G7g/InYDR4GrFrQVkqQTVvUawKdpveHPdvEcY8eB8Vlth4Arq7y2JOnk8ItgklQoA0CSCmUASFKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoQwASSpU1buBLiqHX5vu+qMJrx45xsEDrw5wRpL0xjMAgBVL65z92cfm7f/e5z/EwQHOR5IGwVNAklQoA0CSCmUASFKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoQwASSqUASBJhTIAJKlQBoAkFcoAkKRC+YMwfej2i2H+Wpik05UB0Iduvxjmr4VJOl0NLQAiogbcBbwLOAL8UWbuHdZ8qvL3hCWdroZ5BPB7wIrM/O2IuAD4K+CKIc6nkl6/J/zibR/sGhCHX5tmxdL6nH2Gh6Q30jADYD3wOEBm/mdEvGeIc3nD9POD8/P1LyQ8uvWB4SIJRprN5lBeOCLuAR7OzJ3t5z8A3p6Zx+ZZZB/w/UHNT5IWibcBa+bqGOYRwAGg89/bWpc3f5hnAyRJ1QzzewATwGUA7WsAe4Y4F0kqzjCPAL4OvD8i/h0YAf5wiHORpOIM7RqAJGm4vBWEJBXKAJCkQhkAklSoRXUvoF63l4iIDwN/ARwD7s3M8aFMdIj6qNGfAdfQ+t4FwHWZmQOf6CkiIs4HvpCZG2e1F78vdepSp+L3p4hYCtwLnA0sB7Zl5iMd/UPblxZVANDl9hLtP8KXgN8C/g+YiIhvZOYPhzbb4eh1C453Ax/PzGeHMrtTSETcAHyM1v7S2e6+1GG+OrW5P8FHgVcy82MR8fPAc8AjMPx9abGdAnrd7SWAzttLrAP2ZuZPMvMosBvYMPgpDl23GgGcC9wYEbsj4sZBT+4U8zLwkTna3Zdeb746gfsTwNeAWzqed37hdaj70mILgFXA/o7n0xGxZJ6+KWD1oCZ2CulWI4B/Aj4FvA9YHxGXD3Jyp5LMfBh4bY4u96UOXeoE7k9k5sHMnIqIMWAHsLWje6j70mILgG63l5jdNwZMDmpip5B5axQRI8BfZ+aP2v+NPAb85hDmeKpzX+qD+9NPRcRZwLeABzPzqx1dQ92XFts1gAngw8A/z3F7iReAX4mInwMOAhcBdwx+ikPXrUargOcjYh2t85Hvo3XxSq/nvtQf9ycgIt4KPAn8SWY+Nat7qPvSYguAn7m9RERcBbw5M/++/YmEJ2gd+dybmf8zxLkOS68a3UTrP5UjwFOZ+c0hzvWU4r7UH/enn3ET8BbgloiYuRYwDrxp2PuSt4KQpEIttmsAkqQ+GQCSVCgDQJIKZQBIUqEW26eAJGlRmu9+S7PGfALYDNSBf8nM27qt0yMASTrFte+3dA+wosuYd9B6898InAcsa99raF4eAUjSqW/mfksPAkTErwPbaX2X5xXgk8AlwHeA+4FfAD6XmfPdogPwCECSTnlz3G9pHNjSPh30TeAG4Axa3yS+Bvh94G8jotFtvR4BSNLpZx1wV0QALAVeAr4N/FtmTgFTEfFd4J3AM/OtxACQpNNP0vqdhR9ExIW0TvkksCUiVtC6CPyrwN4u6zAAJOk0tBl4ICLq7efXZOZLEfEPtG74OALclpk/7rYS7wUkSYXyIrAkFcoAkKRCGQCSVCgDQJIKZQBIUqEMAEkqlAEgSYX6f/Fc++S7sj2pAAAAAElFTkSuQmCC\n"
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(x=df.msrp,bins=40)# 产看汽车价格的变量分布\n",
    "plt.show()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-05T05:46:47.429213300Z",
     "start_time": "2023-12-05T05:46:46.765223100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "outputs": [
    {
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD3CAYAAAAe5+9lAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAVU0lEQVR4nO3df4zc913n8efu2tldk11vudpY6AI+fr0xlXzV5ZSEOE4sXVrXCSIoUgCVquG4c0MUcFtVpD/sHgpn44TrhcNwDWhDlISAkEioBEZOKpomcgx3blGR0iN9V45w+8ddkUm7/iHHm3i898d3lkzc3ZnZ+bHenc/zIVma+Xw/s/N574xf893P9/v9zNDc3BySpME3fKUHIElaHga+JBXCwJekQhj4klQIA1+SCrHmSg9gIZcuXZqr1ZqfPTQyMkSrPoPIusti3WXptu61a0f+Gdiw2PYVGfi12hwzM+eb9pmaWteyzyCy7rJYd1m6rXvDholvNNvulI4kFcLAl6RCGPiSVAgDX5IKYeBLUiHaOksnIq4HHsrMHRGxEZgG3gGMAB/MzFcjYjdwD3AR2J+ZhyNiHHgK2AicBe7OzFP9KESS1FzLPfyIuB94FBirN/0W8MeZeTOwD/jxiNgE7AG2ATuBgxExCtwLvJyZ24En6/0lSVdAO1M6rwJ3NtzfBvzriPhr4BeAF4DrgGOZOZuZp4ETwFbgJuDZ+uOOALf2aNySpCVqOaWTmc9ExOaGps3AdzLz1oj4L8DHga8Dpxv6nAXWA5MN7fNtLY2MDDE1ta5Fn+GWfQaRdZfFusvS77o7udL2NeAv6rf/EjgAfBmYaOgzAcwAZxra59ta6uZK26snxxkfXbys12cvcu7M6+0MY0XyCsSyWHdZenClbdPtnQT+S8BtwB8BNwP/BzgOHIiIMWAU2AJ8FThW73sc2AUc7eD5lmR8dA2bP/FXi24/+eDtnOv3ICRpBerktMyPAR+MiL8B3gf8ZmZ+CzhEFejPA3sz8wLwCPCuiHgJ+BDwQG+GLUlaqrb28DPzJHBD/fY3gPcs0Gea6nTNxrbzwF1dj1KS1DUvvJKkQhj4klQIA1+SCmHgS1IhDHxJKoSBL0mFMPAlqRAGviQVwsCXpEIY+JJUCANfkgph4EtSIQx8SSqEgS9JhTDwJakQBr4kFcLAl6RCGPiSVIi2vuIwIq4HHsrMHQ1t7wd+NTN/sn5/N3APcBHYn5mHI2IceArYCJwF7s7MU70tQZLUjpZ7+BFxP/AoMNbQ9m7gPwFD9fubgD3ANmAncDAiRoF7gZczczvwJLCv1wVIktrTzpTOq8Cd83ci4l8BDwIfaehzHXAsM2cz8zRwAtgK3AQ8W+9zBLi1F4OWJC1dyymdzHwmIjYDRMQI8IfAR4HXG7pNAqcb7p8F1l/WPt/W0sjIEFNT61r0GW7ZZzGdPm4l6Kbu1cy6y2Ld/dHWHH6Da4EfBR6hmuL5iYj4H8DzwERDvwlgBjjT0D7f1lKtNsfMzPmmfaam1i3YZ8OGiQV6v12rn72SLVb3oLPuslh3Z1rl35ICPzOPA+8CqO/1/2lmfqQ+h38gIsaAUWAL8FXgGHAbcBzYBRxd4vglST3Sk9MyM/NbwCGqQH8e2JuZF6j+EnhXRLwEfAh4oBfPJ0laurb28DPzJHBDs7bMnAamL+tzHrir20FKkrrnhVeSVAgDX5IKYeBLUiEMfEkqhIEvSYUw8CWpEAa+JBXCwJekQhj4klQIA1+SCmHgS1IhDHxJKoSBL0mFMPAlqRAGviQVwsCXpEIY+JJUCANfkgrR1lccRsT1wEOZuSMi3g38LlADZoEPZuY/RcRu4B7gIrA/Mw9HxDjwFLAROAvcnZmn+lGIJKm5lnv4EXE/8CgwVm/6HeBXM3MH8OfAxyNiE7AH2AbsBA5GxChwL/ByZm4HngT29bwCSVJb2pnSeRW4s+H+z2fm39dvrwEuANcBxzJzNjNPAyeArcBNwLP1vkeAW3syaknSkrWc0snMZyJic8P9/wcQETcCvwLcTLVXf7rhYWeB9cBkQ/t8W0sjI0NMTa1r0We4ZZ/FdPq4laCbulcz6y6LdfdHW3P4l4uInwP2Ardn5qmIOANMNHSZAGaAxvb5tpZqtTlmZs437TM1tW7BPhs2TCzQ++1a/eyVbLG6B511l8W6O9Mq/5Yc+BHxAaqDszsy89v15uPAgYgYA0aBLcBXgWPAbfXtu4CjS30+SVJvLCnwI2IEOAR8E/jziAB4MTN/PSIOUQX6MLA3My9ExCPAExHxEvAG8P6ejl6S1La2Aj8zTwI31O9+7yJ9poHpy9rOA3d1MT5JUo944ZUkFcLAl6RCGPiSVAgDX5IKYeBLUiEMfEkqhIEvSYUw8CWpEAa+JBXCwJekQhj4klQIA1+SCmHgS1IhDHxJKoSBL0mFMPAlqRAGviQVwsCXpEK09RWHEXE98FBm7oiIHwEeB+aovqj8vsy8FBG7qb7c/CKwPzMPR8Q48BSwETgL3J2Zp/pQhySphZZ7+BFxP/AoMFZvehjYl5nbgSHgjojYBOwBtgE7gYMRMQrcC7xc7/sksK/3JUiS2tHOlM6rwJ0N968FXqzfPgLcClwHHMvM2cw8DZwAtgI3Ac9e1leSdAW0nNLJzGciYnND01BmztVvnwXWA5PA6YY+C7XPt7U0MjLE1NS6Fn2GW/ZZTKePWwm6qXs1s+6yWHd/tDWHf5lLDbcngBngTP12s/b5tpZqtTlmZs437TM1tW7BPhs2TCzQ++1a/eyVbLG6B511l8W6O9Mq/zo5S+crEbGjfnsXcBQ4DmyPiLGIWA9soTqgewy47bK+kqQroJPA/xjwQET8LXAV8HRmfgs4RBXozwN7M/MC8Ajwroh4CfgQ8EBvhi1JWqq2pnQy8yRwQ/3214FbFugzDUxf1nYeuKvrUUqSuuaFV5JUCANfkgph4EtSIQx8SSqEgS9JhTDwJakQBr4kFcLAl6RCGPiSVAgDX5IKYeBLUiE6WR5Zq8jVk+OMjy7+Mr8+e5FzZ15fxhFJulIM/AE3PrqGzZ/4q0W3n3zwds4t43gkXTlO6UhSIQx8SSqEgS9JhTDwJakQBr4kFaKjs3QiYi3wBLAZqAG7gYvA48Ac1ReY35eZlyJiN3BPffv+zDzc/bAlSUvV6R7+bcCazLwR+A3gAPAwsC8ztwNDwB0RsQnYA2wDdgIHI2K0+2FLkpaq0/Pwvw6siYhhYBJ4k+pLzl+sbz8CvJdq7/9YZs4CsxFxAtgKfKmrUatnLrxZY8OGiabbx9aOLLjNi7ak1aXTwD9HNZ3zNeCdwE8BN2fmXH37WWA91YfB6YbHzbc3NTIyxNTUuhZ9hlv2WUynj1sJuql7IWNrR1pemLXY9pMP3s6aZfpd9rru1cK6y9LvujsN/I8Cz2XmJyPiGuB54KqG7RPADHCmfvvy9qZqtTlmZs437TM1tW7BPs32Vue1+tkr2UJ1t1o+oZ+W63e52Os96Ky7LN3W3Sr/Ok2J71BN4wB8G1gLfCUidmTmC8Au4IvAceBARIwBo8AWqgO66qFmyyecfPD2ZR6NpJWq08D/beCxiDhKtWf/KeDLwHREXAW8AjydmbWIOAQcpTpAvDczL/Rg3JKkJeoo8DPzHPCzC2y6ZYG+08B0J88jSeodL7ySpEIY+JJUCANfkgph4EtSIQx8SSqEgS9JhTDwJakQfom5OtZq4TUXV5NWFgNfHWtn4bVzyzgeSc05pSNJhTDwJakQBr4kFcLAl6RCGPiSVAjP0lHfNDtt01M2peVn4Ktvmp226Smb0vJzSkeSCmHgS1IhOp7SiYhPAj9N9Z22nwVeBB4H5qi+qPy+zLwUEbuBe4CLwP7MPNztoCVJS9fRHn5E7ABuBLZRfY/tNcDDwL7M3A4MAXdExCZgT73fTuBgRIz2YNySpCXqdA9/J/Ay8DlgEvg1YDfVXj7AEeC9QA04lpmzwGxEnAC2Al/qZtCluXpynPHRt16qZguWSdJiOg38dwI/CPwU8G+AvwCGM3Ouvv0ssJ7qw+B0w+Pm25saGRliampdiz7DLfssptPHXSlr21ikbDVayuvQzeu9mll3Wfpdd6eB/xrwtcx8A8iIuEA1rTNvApgBztRvX97eVK02x8zM+aZ9pqbWLdinnb3fVj97pRnUPfqlvA6Lvd6DzrrL0m3drbKi07N0XgLeFxFDEfH9wPcAX6jP7QPsAo4Cx4HtETEWEeuBLVQHdCVJy6yjPfzMPBwRN1MF+jBwH/CPwHREXAW8AjydmbWIOEQV/sPA3sy80JuhS5KWouPTMjPz/gWab1mg3zQw3enzSJJ6wwuvJKkQBr4kFcLF03RF+AXo0vIz8HVF+AXo0vJzSkeSCmHgS1IhDHxJKoSBL0mFMPAlqRAGviQVwsCXpEIY+JJUCANfkgph4EtSIQx8SSqEa+loRVpocbXG+y6uJi2dga8VycXVpN4z8FeIqyfHGR/15ZDUP10lTERsBP4OeA9wEXgcmKP6ovL7MvNSROwG7qlv35+Zh7sa8YAaH12z6B7tyQdvX+bRSBpEHR+0jYi1wB8A8xOpDwP7MnM7MATcERGbgD3ANmAncDAiRrsbsvTWHP9C/66eHL/Sw5NWpG728D8D/D7wyfr9a4EX67ePAO8FasCxzJwFZiPiBLAV+FIXzys1neN3fl9aWEeBHxG/CJzKzOciYj7whzJzrn77LLAemARONzx0vr2pkZEhpqbWtegz3LLPYjp9nFaPQXmNu3mfr2bW3R+d7uH/EjAXEbcC7waeBDY2bJ8AZoAz9duXtzdVq80xM3O+aZ+pqXUL9mn2PanzWv3sK6Gdcat9K/E17sRi7/NBZ92daZUjHQV+Zt48fzsiXgB+GfhvEbEjM18AdgFfBI4DByJiDBgFtlAd0JUkLbNengf4MWA6Iq4CXgGezsxaRBwCjlIdIN6bmRd6+JySpDZ1HfiZuaPh7i0LbJ8Gprt9HklSd1xLR5IKYeBLUiEMfEkqhIu3aOAstNJmI1faVKkM/GXi4mjLx5U2pYWZQMuk2eJo4AJpkvrPOXxJKoSBL0mFMPAlqRAGviQVwsCXpEIY+JJUCE/LVHGaXZjlRVkaZAa+iuPXI6pUTulIUiEMfEkqhFM6UgMXXtMgM/ClBi68pkHWUeBHxFrgMWAz1ZeT7wf+AXgcmKP6ovL7MvNSROwG7gEuAvsz83D3w5YkLVWnc/gfAF7LzO3ALuD3gIeBffW2IeCOiNgE7AG2ATuBgxEx2v2wJUlL1emUzp8BTzfcvwhcC7xYv38EeC9QA45l5iwwGxEngK3Alzp8XklShzoK/Mw8BxARE1TBvw/4TGbO1bucBdYDk8DphofOtzc1MjLE1NS6Fn2GW/ZZTKePk2B53z/dvM9XM+vuj44P2kbENcDngM9m5p9ExG81bJ4AZoAz9duXtzdVq80xM3O+aZ+pqXUL9ml2hsW8Vj+7H9oZl1aH5Xz/LPY+H3TW3ZlWOdPpQdvvAz4P/EpmfqHe/JWI2JGZL1DN638ROA4ciIgxqoO7W6gO6EqrkqdtajXrdA//U8A7gE9HxKfrbR8GDkXEVcArwNOZWYuIQ8BRqgPEezPzQreDlq4UT9vUatbpHP6HqQL+crcs0HcamO7keSRJvePSCpJUCK+0lXrIpZe1khn4Ug+59LJWMqd0JKkQBr4kFcLAl6RCOIcvrRJXT44zPrrwf1kPCKsdBr60SoyPrvGAsLpi4PdIs70vCVovy3DhzRpja0e+q72ddZhc8kHtMKF6pNneF1R7YCpbO8sydPoecskHtcODtpJUCANfkgrhlI5UAJd8EBj4UhGazfF/7b++zwO+hTDwpcJ5wLccxQW+f9pKvdXNBWHNHnv15HjHj23nuUtUXOC7mqG0NK3O8Qc6ni7q12Oh+f/nUj8sigt8SUvTzpTPSnsstP6gavazm33YLHaB3LyV/GHR98CPiGHgs8C/BWaB/5yZJ/r9vP3g1bTS6tHqr/luHtvphwU0/8C48Gat6bi6tRzp9TPAWGb+ZETcAPx34I5leN4l6+ZPV6+klQTdXVF98sHbOduvgbE8gX8T8CxAZv6viPj3y/CcHen2T0hJWsmG5ubm+voEEfEo8ExmHqnf/ybwQ5l5scnDTgHf6OvAJGnw/CCwYbGNy7GHfwZonCcZbhH20GTAkqTOLMdaOseA2wDqc/gvL8NzSpIusxx7+J8D3hMRfwMMAf9xGZ5TknSZvs/hS5JWBpdHlqRCGPiSVAgDX5IKsarWCRikZRoiYi3wGLAZGAX2A/8APA7MAV8F7svMSxGxG7gHuAjsz8zDETEOPAVsBM4Cd2fmqfqZUL9T7/v5zHxgWQtrU0RsBP4OeA/VWB9nwOuOiE8CPw1cRfU+fpEy6l4LPEH1Xq8Buxnw1zwirgceyswdEfEj9KnWiPh14PZ6+0cy83izca22Pfx/WaYB+ATVMg2r1QeA1zJzO7AL+D3gYWBfvW0IuCMiNgF7gG3ATuBgRIwC9wIv1/s+Ceyr/9zfB95PdYXz9RHx75axprbUA+APgPkVpga+7ojYAdxIVc8twDUUUHfdbcCazLwR+A3gAANce0TcDzwKjNWb+lJrvd5bgOuBnwf+Z6uxrbbAf9syDcCKXaahDX8GfLrh/kXgWqq9PoAjwK3AdcCxzJzNzNPACWArDb+L+b4RMQmMZuarmTkHPAf8h75XsnSfoXoD/9/6/RLq3kl1DcrngL8EDlNG3QBfB9bU/0KfBN5ksGt/Fbiz4X6/ar2Jam9/LjO/SfU7bnrR6moL/EngdMP9WkSsqmmpeZl5LjPPRsQE8DTVJ/lQ/QWF6s+59Xx3zQu1N7adWaDvihERvwicysznGpoHvm7gnVQ7KHcBvwz8MdVV54NeN8A5qumcrwHTwCEG+DXPzGeoPtTm9avWxX7GolZb4HeyTMOKFRHXAF8E/igz/wS41LB5Apjhu2teqL1V35Xkl6guxHsBeDfVn60bG7YPat2vAc9l5huZmcAF3v6fc1DrBvgoVe0/RnX87Qmq4xjzBrl26N//6yX/DlZb4A/MMg0R8X3A54GPZ+Zj9eav1Od6oZrXPwocB7ZHxFhErAe2UB34+ZffxXzfzDwDvBERPxwRQ1TTCEeXpaA2ZebNmXlLZu4A/h74IHBk0OsGXgLeFxFDEfH9wPcAXyigboDv8Nae6LeBtRTwXm/Qr1qPATsjYjgifoBqB/ifmw1ktU2HDNIyDZ8C3gF8OiLm5/I/DByKiKuAV4CnM7MWEYeoXuBhYG9mXoiIR4AnIuIl4A2qAzrw1nTBCNX83v9evpI69jFgepDrrp+BcTPVf/Rh4D7gHxnwuut+G3gsIo5S7dl/CvgyZdQOfXx/13+nf8tb76mmXFpBkgqx2qZ0JEkdMvAlqRAGviQVwsCXpEIY+JJUCANfkgph4EtSIf4/YXqUyx1wEckAAAAASUVORK5CYII=\n"
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(x=df.msrp[df.msrp<1e5],bins=40)# 产看汽车价格的变量分布\n",
    "plt.show()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-05T05:46:50.584158400Z",
     "start_time": "2023-12-05T05:46:50.144160300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [
    {
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD3CAYAAAAT+Z8iAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAPCElEQVR4nO3dbYxcZ3mH8Wu9fhe2tyqbRqoqLATcCkgpUmh5Sdz4A2+JoSm0aaOIklIIUZQSQFQQiNM2bdqYiIbiIkJrGpI0qlo1gapKZGgl2pAYqqgIqqTADaEKfCnIfdnYxvaC19MPc1Zab8c7s+udObN3rp9k+ZznPLPPfcZn/nv8nDMzE51OB0lSXevaLkCSNFwGvSQVZ9BLUnEGvSQVZ9BLUnHr2y6gl9OnT3fm5tq7G2hycoI2x18u6x0u6x0u610dnU6HjRvXfx54/eJtYxn0c3MdZmaOtzb+1NTWVsdfLusdLusdLutdPdPT257bq92pG0kqzqCXpOIMekkqzqCXpOIMekkqzqCXpOIMekkqzqCXpOIMekkqbizfGau14Tnbt7Bl08oOoenpbec09onZUxw7cuKcfob0bGHQa8W2bFrPzpsebmXsp/ft4VgrI0trj1M3klScQS9JxRn0klScQS9JxRn0klScQS9JxRn0klScQS9JxRn0klScQS9JxRn0klScQS9JxRn0klScQS9JxRn0klScQS9JxRn0klScQS9JxRn0klScQS9JxRn0klScQS9JxRn0klScQS9Jxa0fpFNEnAd8BXgNcAq4B+gATwI3ZObpiLgWuK7ZfltmPhQRW4D7gfOAo8A1mXl41fdCknRWfYM+IjYAfwacaJruBPZm5j9HxCeBKyLiy8CNwMuAzcBjEfGPwPXAE5n5exFxFbAXePcQ9kPPMid/PMf09LaRjTc/1onZUxw7cqJPb2m8DHJG/xHgk8AHm/WLgEea5YPAa4E54FBmzgKzEfEUcCFwCXDHgr63rFLdepbbvGGSnTc9PPJxn963h2MjH1U6N0sGfUT8BnA4Mz8fEfNBP5GZnWb5KLAD2A48s+Chvdrn2/qanJxgamrrQDswDJOT61odf7nWWr1r3bg/12vteLDe4et3Rv+bQCciXg28FLiP7nz7vG3ADHCkWV6qfb6tr7m5DjMzxwfpOhRTU1tbHX+52qp3lFMn42Tcjw2P3+Ea53rP9ppc8q6bzPyFzLw0M3cDXwPeChyMiN1Nl8uAR4HHgV0RsTkidgAX0L1Qewi4fFFfSdIIreT2yvcBtzYXYDcCD2Tm94H9dIP8C8DNmXkSuAt4SUQ8BrwTuHV1ypYkDWqg2ysBmrP6eZf22H4AOLCo7Thw5UqLkySdO98wJUnFGfSSVJxBL0nFGfSSVJxBL0nFGfSSVJxBL0nFGfSSVJxBL0nFGfSSVJxBL0nFGfSSVJxBL0nFGfSSVJxBL0nFGfSSVJxBL0nFGfSSVJxBL0nFGfSSVJxBL0nFGfSSVJxBL0nFGfSSVJxBL0nFGfSSVJxBL0nFGfSSVJxBL0nFGfSSVJxBL0nFGfSSVJxBL0nFGfSSVJxBL0nFGfSSVJxBL0nFre/XISImgQNAAHPA24AJ4B6gAzwJ3JCZpyPiWuA64BRwW2Y+FBFbgPuB84CjwDWZeXgI+yJJ6mGQM/o3AmTmxcDvAHc2f/Zm5i66oX9FRJwP3AhcDLwOuD0iNgHXA080fe8D9q76XkiSzqrvGX1m/l1EPNSsPg/4AbAHeKRpOwi8lu7Z/qHMnAVmI+Ip4ELgEuCOBX1v6Tfm5OQEU1Nbl7Mfq2pycl2r4y/XWqt3rRv353qtHQ/WO3x9gx4gM09FxL3Am4BfAd6QmZ1m81FgB7AdeGbBw3q1z7ctaW6uw8zM8YF2YBimpra2Ov5ytVXv9PS2kY85Dsb92PD4Ha5xrvdsr8mBL8Zm5jXAi+jO129ZsGkbMAMcaZaXap9vkySNSN+gj4hfj4gPNqvHgdPAv0bE7qbtMuBR4HFgV0RsjogdwAV0L9QeAi5f1FeSNCKDTN18Bvh0RHwR2AC8B/gGcCAiNjbLD2TmXETspxvk64CbM/NkRNwF3BsRjwE/Aq4exo5Iknob5GLsD4Ff7bHp0h59D9Cd2lnYdhy4cqUFSpLOjW+YkqTiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKm79UhsjYgNwN7AT2ATcBnwduAfoAE8CN2Tm6Yi4FrgOOAXclpkPRcQW4H7gPOAocE1mHh7OrkiSeul3Rv8W4L8zcxdwGfBx4E5gb9M2AVwREecDNwIXA68Dbo+ITcD1wBNN3/uAvcPZDUnS2fQL+r8Fblmwfgq4CHikWT8IvBr4eeBQZs5m5jPAU8CFwCXA5xb1lSSN0JJTN5l5DCAitgEP0D0j/0hmdpouR4EdwHbgmQUP7dU+39bX5OQEU1NbB9yF1Tc5ua7V8ZdrrdW71o37c73WjgfrHb4lgx4gIn4G+Czwicz8q4i4Y8HmbcAMcKRZXqp9vq2vubkOMzPHB+k6FFNTW1sdf7naqnd6elv/TgWN+7Hh8Ttc41zv2V6TS07dRMRPAf8AfCAz726avxoRu5vly4BHgceBXRGxOSJ2ABfQvVB7CLh8UV9J0gj1O6P/EPATwC0RMT9X/25gf0RsBL4BPJCZcxGxn26QrwNuzsyTEXEXcG9EPAb8CLh6KHshSTqrfnP076Yb7Itd2qPvAeDAorbjwJXnUqAk6dz4hilJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6TiDHpJKs6gl6Ti+n7D1FrznO1b2LLp3Hdrud+edGL2FMeOnDjncSVptZUL+i2b1rPzpodHPu7T+/ZwbOSjSlJ/Tt1IUnEGvSQVZ9BLUnEGvSQVZ9BLUnEGvSQVZ9BLUnEGvSQVZ9BLUnEGvSQVZ9BLUnEGvSQVZ9BLUnEGvSQVZ9BLUnEGvSQVZ9BLUnEGvSQVZ9BLUnEGvSQVZ9BLUnHrB+kUES8HPpyZuyPiBcA9QAd4ErghM09HxLXAdcAp4LbMfCgitgD3A+cBR4FrMvPwEPZDknQWfc/oI+L9wKeAzU3TncDezNwFTABXRMT5wI3AxcDrgNsjYhNwPfBE0/c+YO/q74IkaSmDnNF/B3gz8JfN+kXAI83yQeC1wBxwKDNngdmIeAq4ELgEuGNB31sGKWpycoKpqa0D7cA4aavmycl1a/L5WqvG/blea8eD9Q5f36DPzAcjYueCponM7DTLR4EdwHbgmQV9erXPt/U1N9dhZub4IF3/n+npbSt63GpYac3nampqaytjt/lct6mtf+dBtXU8rJT1rp6zvSZXcjH29ILlbcAMcKRZXqp9vk2SNEIrCfqvRsTuZvky4FHgcWBXRGyOiB3ABXQv1B4CLl/UV5I0QisJ+vcBt0bEl4GNwAOZ+X1gP90g/wJwc2aeBO4CXhIRjwHvBG5dnbIlSYMa6PbKzHwaeEWz/C3g0h59DgAHFrUdB6485yolSSvmG6YkqTiDXpKKM+glqTiDXpKKM+glqTiDXpKKG+j2SkldJ38819pHP5yYPcWxIydaGVtrm0EvLcPmDZPsvOnhVsZ+et8ejrUystY6p24kqTiDXpKKM+glqTiDXpKKM+glqTiDXpKKM+glqTiDXpKKM+glqTiDXpKKM+glqTiDXpKKM+glqTiDXpKKM+glqTg/j76AOWjtyzAkjT+DvoC2vgzj6X17Rj7ms9lyvt1qNX/x+81Wa59BL60Rbf5C95ut1jbn6CWpOINekooz6CWpOINekooz6CWpOINekorz9kpJS1rO/fsr1evne//+6jHoJS3J+/fXPqduJKk4g16Sihv61E1ErAM+AfwsMAu8IzOfGva4ozaKeUxJWolRzNH/ErA5M18ZEa8A/hi4YgTjjlRb85jgh4tJWtoopm4uAT4HkJn/ArxsBGNKkhoTnU5nqANExKeABzPzYLP+PeD5mXlqiYcdBr471MIkqZb/av5+/eINo5i6OQIsnLxe1yfkAaaHWI8kPauMYurmEHA5QDNH/8QIxpQkNUZxRv9Z4DUR8SVgAnjbCMaUJDWGPkcvSWqXb5iSpOIMekkqzqCXpOL89MoFImIDcC+wE5gDrs3Mb7Za1BIiYhPwaeD5dG9jvSEzv91uVb1FxMuBD2fm7oh4AXAP0AGepFv36TbrW2xhvc36m4ArM/PqVgvrYdFz+1LgT+kev7PAWzPzB60WuMiiel8M/DndGzX+DXhXZs61WuAii4+Fpu1qurW+srXClsEz+jNdDqzPzFcBvw/8Ycv19HMtcCwzXwG8C/h4y/X0FBHvBz4FbG6a7gT2ZuYuui/wsfpIjMX1RsTHgNsZw9dLj+f2Y3QDaDfwGeADLZXWU496/wj4UGZeDGwFfrGt2nrpUS/NL9O30z1214SxO3Bb9i1gffNBbNuBH7dcTz8vBg4CZGYCF7Rbzll9B3jzgvWLgEea5YPAq0de0dIW1/sl4PqWaulnca1XZebXmuX1wMnRl7SkxfX+cmZ+MSI2AucDY/W/DxbVGxE/CewD3tNaRStg0J/pGN1pm28CB4D9rVbT39eAN0TERPNmtJ+OiMm2i1osMx/kzF+aE5k5f1/vUWDH6Ks6u8X1Zubf0J1mGjs9av1PgIh4FfBbwEdbKq2nHvXORcTzgH8HngtkW7X1srDe5rX1F8B76R63a4ZBf6b3Ap/PzBfR/VjleyNic5/HtOluunPz/wS8EfjKuM1vnsXC+fhtwExbhVQUEb8GfBLYk5mH266nn8z8bma+kG7Nd7ZdzxIuAl4I3AX8NfDiiPiTdksajEF/pv8FnmmW/wfYAIzdGfICPwc81szHfhb4j3bLGdhXI2J3s3wZ8GiLtZQSEW+heya/OzPH/niIiL+PiBc2q0c58yRgrGTm45n5kub1dhXw9cxcE1M43nVzpo8Cd0fEo8BGuheJfthyTUv5NvAHEfHbdM+K395yPYN6H3CgmZf9BvBAy/WU0Ewt7Ae+B3wmIgAeyczfbbWwpe0D7omIHwHHgXe0XE9JfgSCJBXn1I0kFWfQS1JxBr0kFWfQS1JxBr0kFWfQS1JxBr0kFfd/UOT2SWv2fIkAAAAASUVORK5CYII=\n"
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "log_price = np.log1p(df.msrp)\n",
    "plt.hist(x=log_price)\n",
    "plt.show()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T07:30:44.607056500Z",
     "start_time": "2023-12-03T07:30:44.359062700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "outputs": [
    {
     "data": {
      "text/plain": "make                    0\nmodel                   0\nyear                    0\nengine_fuel_type        3\nengine_hp              69\nengine_cylinders       30\ntransmission_type       0\ndriven_wheels           0\nnumber_of_doors         6\nmarket_category      3742\nvehicle_size            0\nvehicle_style           0\nhighway_mpg             0\ncity_mpg                0\npopularity              0\nmsrp                    0\ndtype: int64"
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.isnull().sum()# 查看是否存在缺失值"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T07:30:48.677982400Z",
     "start_time": "2023-12-03T07:30:48.587463500Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 设计验证框架"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [],
   "source": [
    "n = len(df) ## 获取df中的行数\n",
    "n_val = int(0.2*n) # 验证集\n",
    "n_test = int(0.2*n) # 测试集\n",
    "n_train = n - n_val-n_test # 训练集\n",
    "\n",
    "\n",
    "np.random.seed(2)\n",
    "idx = np.arange(n)\n",
    "np.random.shuffle(idx) # 创建一个索引从0到n-1的数组并使其无序\n",
    "df_shuffled = df.iloc[idx] # 使用带索引的数组来获得一个无序的DataFrame\n",
    "\n",
    "df_train = df_shuffled.iloc[0:n_train].copy()\n",
    "df_val = df_shuffled.iloc[n_train:n_train+n_val].copy()\n",
    "df_test = df_shuffled.iloc[n_train+n_val:].copy()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T07:31:12.505694100Z",
     "start_time": "2023-12-03T07:31:12.486711600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [],
   "source": [
    "y_train = np.log1p(df_train.msrp.values)\n",
    "y_val = np.log1p(df_val.msrp.values) # 对目标变量取对数以便消除长尾\n",
    "y_test = np.log1p(df_test.msrp.values)\n",
    "del df_train['msrp']\n",
    "del df_val['msrp']\n",
    "del df_test['msrp']"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T07:31:23.573343800Z",
     "start_time": "2023-12-03T07:31:23.554338700Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 训练线性回归模型"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "outputs": [],
   "source": [
    "def train_linear_regression(X,y):\n",
    "    # 添加偏置项\n",
    "    ones = np.ones(X.shape[0]) # 创建只包含1的数组\n",
    "    X = np.column_stack([ones,X]) # 将1的数组添加到第一列\n",
    "    XTX = X.T.dot(X) # 计算X‘X\n",
    "    XTX_inv = np.linalg.inv(XTX) #计算X’X的逆\n",
    "    w = XTX_inv.dot(X.T).dot(y) # 计算标准方程式的其余部分\n",
    "    return w[0],w[1:]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T07:51:46.651920300Z",
     "start_time": "2023-12-03T07:51:46.629922200Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 预测价格"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "outputs": [
    {
     "data": {
      "text/plain": "       engine_hp  engine_cylinders  highway_mpg  city_mpg  popularity\n2735       148.0               4.0           33        24        1385\n6720       132.0               4.0           32        25        2031\n5878       148.0               4.0           37        28         640\n11190       90.0               4.0           18        16         873\n4554       385.0               8.0           21        15        5657",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>engine_hp</th>\n      <th>engine_cylinders</th>\n      <th>highway_mpg</th>\n      <th>city_mpg</th>\n      <th>popularity</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2735</th>\n      <td>148.0</td>\n      <td>4.0</td>\n      <td>33</td>\n      <td>24</td>\n      <td>1385</td>\n    </tr>\n    <tr>\n      <th>6720</th>\n      <td>132.0</td>\n      <td>4.0</td>\n      <td>32</td>\n      <td>25</td>\n      <td>2031</td>\n    </tr>\n    <tr>\n      <th>5878</th>\n      <td>148.0</td>\n      <td>4.0</td>\n      <td>37</td>\n      <td>28</td>\n      <td>640</td>\n    </tr>\n    <tr>\n      <th>11190</th>\n      <td>90.0</td>\n      <td>4.0</td>\n      <td>18</td>\n      <td>16</td>\n      <td>873</td>\n    </tr>\n    <tr>\n      <th>4554</th>\n      <td>385.0</td>\n      <td>8.0</td>\n      <td>21</td>\n      <td>15</td>\n      <td>5657</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "base = ['engine_hp','engine_cylinders','highway_mpg','city_mpg','popularity']\n",
    "df_num = df_train[base]\n",
    "df_num.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T07:45:43.983980600Z",
     "start_time": "2023-12-03T07:45:43.977977800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "outputs": [
    {
     "data": {
      "text/plain": "make                    0\nmodel                   0\nyear                    0\nengine_fuel_type        1\nengine_hp              40\nengine_cylinders       14\ntransmission_type       0\ndriven_wheels           0\nnumber_of_doors         6\nmarket_category      2251\nvehicle_size            0\nvehicle_style           0\nhighway_mpg             0\ncity_mpg                0\npopularity              0\ndtype: int64"
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train.isnull().sum()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T07:47:19.366823700Z",
     "start_time": "2023-12-03T07:47:19.312824800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "outputs": [],
   "source": [
    "df_num = df_num.fillna(0) # 缺失值用0来代替"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T07:47:48.790240900Z",
     "start_time": "2023-12-03T07:47:48.770212900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "outputs": [
    {
     "data": {
      "text/plain": "engine_hp           0\nengine_cylinders    0\nhighway_mpg         0\ncity_mpg            0\npopularity          0\ndtype: int64"
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_num.isnull().sum()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T07:48:25.829152800Z",
     "start_time": "2023-12-03T07:48:25.798121900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "outputs": [],
   "source": [
    "X_train = df_num.values # 将dataframe转化为numpy数组"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T07:49:06.632332Z",
     "start_time": "2023-12-03T07:49:06.611295800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "outputs": [],
   "source": [
    "w_0,w = train_linear_regression(X_train,y_train)\n",
    "y_pred = w_0+X_train.dot(w)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T07:51:52.799218200Z",
     "start_time": "2023-12-03T07:51:52.688220700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "outputs": [
    {
     "data": {
      "text/plain": "<matplotlib.legend.Legend at 0x1dd948d9070>"
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD3CAYAAAAT+Z8iAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAYL0lEQVR4nO3de3SU9Z3H8fdkkkCCgaQQiroUtOJ3sT3aCouXquWcdtsi63pp7Xbd1guIymoF14pdwRWrVmyVVuqKGhC0bre2WuuttHR7uhboRRa1B1r7tajBtrt6EEi4hYRMZv+YCQ4xl8lkLk9+fF7neM7MM8/M85mLH5785nl+E0smk4iISLjKSh1AREQKS0UvIhI4Fb2ISOBU9CIigVPRi4gErrzUAbrT0dGRTCSicTRQPB4jKlkyRTUXRDebcvVfVLNFNReUNltFRfxtoL7r8kgWfSKRpKlpb6ljAFBbWx2ZLJmimguim025+i+q2aKaC0qbrb6+Zkt3yzV0IyISOBW9iEjgVPQiIoGL5Bi9iIQvkWhnx46ttLe39fu+b70VI6rTtxQjW3l5JXV19cTj2VW4il5ESmLHjq0MHVrNsGFjiMVi/bpvPF5GItFRoGQDU+hsyWSSPXt2smPHVkaNOjyr+2joRkRKor29jWHDhve75A91sViMYcOG9+svIRW9iJSMSj43/X3dNHQjIpFw2PAqqobkr5JaWtvZvbMlb483mKnoRSQSqoaUM/7Lz+bt8RoXTWd33h6tZzfd9K+cffanaWtr46233uS88z7T7XpPPvkDpk//e15//VXWrv0Fl1wyqwjpUlT00qeRI8opq6zq133q62v6vZ2Otha2Nbf3+34iUXDyyaf2evu3v72CT31qOhMmGBMmWJFSpajopU9llVWwcETht7OwGdhV8O2IdPrRj55mzZrn2Lt3D01NTVxyyaUsX34/Y8eOo6Kiguuuu4FFi75Cc3MzAHPnXsf7338Mjz/+PZ555oeMHDmKHTt2HHisLVsaueqqOaxcuYw1a54jkUhwzjmfprw8zvbt21i48AbOP/8fefLJx7n55ttZvXoV3/vef1JRUcHYse9j3rz5rF69il/9ah2trfv4y1/+zD/900WceeZZA3qeKnoROaS1tOzlG9/4d5qadjBr1kV0dHRw8cUzOfbYv+bee5cwadIUzj33M/zpT2/w1a/ezNe/fjff//53efjh71JWVsbMmZ8/6PHc/8BvfvNLHnhgJfv37+e+++5hzpxrWblyOQsXfpXf/W4jAM3NTSxffj8rVvwH1dXDWLLkLp588nGqqqrZs2c3ixffw5/+9AbXX3+Nil5EZCA+9KETKSsr4z3vGUlNzXC2bHmd971vPACvvbaZF174H372s9UA7Nq1iy1bGjnqqKOprKwEYOLEDxz0eG+8sYWJEz9APB4nHo8zd+6Xut3u//7vXzjqqKOprh4GwAknnMj69b/muOM+yDHHHAvA6NHvpa2t/yeUdaXDK0XkkOb+BwC2b9/Gnj17qKt7z4HDF8eNG89nP3sB99zzALfcsohPfOJTHHHEkTQ2vkZr6z4SiQSvvOIHPd64ceN55RWno6OD9vZ25s79Z9ra2ojFyg46Y/bww4+ksfF1WlpSRwa99NILjB37PiD/h51qj15EIqGltZ3GRdPz+njZ2L59G3PmzGb37t1ce+313Hnn7Qduu/DCGSxadAtPPfUD9u7dw4wZl1FXV8ell17BFVfMoLa2jqqqgw9UOPZY46STTmH27Jl0dHRw7rmfobKykhNO+BBf+tLVzJhxGQC1tbXMmHE5V199ObFYGX/1V2O54oqrDvz1kE+xKM4XsX9/IhmVuaajOu91MXPV19cU5ctYFjZnvWoux0jrvey/QmZ7880tjBkzLqf75muagc4vUGfP/uKAH6tTsaZn6O71q6+v2QBM7rqu9uglUrI9jrpYx0iLhEBFH4B8n1EocqgY6NEsg4XaIQD5PqOwq3yOm4pI8emoGxGRwKnoRUQCp6EbEYmEXOZU6o3mTnqHil5EIiHfcyr1NXdSa2srq1ev4qyzzsnbNjsfd9WqZ/P+uAOhoRsROSRt376Np5/+4aB53IHQHr2IHJIefvhBGhtfZ8WKBl5++fe0tbWyc2czF188izPOmMoXvvDZA7NYXnPNPG6+eT779+9n7NhxvPDCeh599Ie8+OIGHnjgXuLxOEcccSTz5s1n5crlBx63mHPO90ZFLyKHpAsvnMGrr27mgx88nhNO+DAnnjiZjRt/y/Ll93PGGVNpaWk5MIvlkiV3cfrpUznvvPNZv/7XrF//a5LJJHfccRtLly6jru49NDQs5Uc/epqLL57J5s1/jEzJQx9Fb2YVwIPAeGAIcCvwZ+Bp4I/p1Za6+6NmNgu4HGgHbnX3Z8ysCngEGE1qsOwid99aiCciIpKLkSNH8dBDy3n22SeBGO3t73yB2zmLZWNjI9Om/R0Axx//YQCamnawbdvb3Hjjl4HU2PyUKScXNXu2+tqj/zywzd2/YGYjgReBrwCL3f2uzpXMbAxwNak5FoYCa83sp8BsYKO7LzSzzwELgDkFeB4iIv2Smk2yg2XL7uOss87hlFM+wrPPPsWqVc9krJOaRfLoo9/Ppk0bmTDBDswnP2JELaNHj2bRosUcdthhrF37HFVV1cRiMZLJws910x99Ff33gccyrrcDkwAzs7NJ7dXPBaYA69y9FWg1s83A8cBpwNfS910F3JhNqHg8Rm1tddZPopDi8bLIZMkU1VzF1N/nH9XXLKq5oLDZ3norRjz+zvEgHW0t6SNl8qOjreWgx+9q1KiRtLe309j4GnfffSePPLKC0aPH0NzcdOB+8XgZ8XgZF110CTfffCM///l/MWpUPeXl5VRUlHPNNdcxb95cOjo6GDZsGP/2b7cwbNgw2tvbue++b3HllYXbr43Fsu/JXove3XcDmFkNqcJfQGoIZ5m7bzCz+cBNwEtA5ju0CxgBDM9Y3rmsT4lEMjKz+UV1ZsHMXLn8PmsI+vu+DIb3MmoKmS2ZTB40y+O25g6y/SnJfMwQWV5ewYoV3+n2tkSig8cee/rA5U2bNjJz5uVMnPgB1q//DW+/vZVEooPJk09i8uST3pWt83ELOYtlMvnunuypC/r8MtbMxgJPAPe6+3fMrNbdm9I3PwF8C/gFkLmFGqAJ2JmxvHOZiMigcvjhR3L77V8hHo/T0dHR469GRVVfX8a+F1gNXOXuP0sv/omZfdHdnwc+BmwAngduM7OhpPb4JwKbgHXAmenbpwFrCvIsREQKaPz4o7j//hWljpGzvvbobwDqgBvNrHN8/V+Ab5pZG/AmcJm77zSzJaSKvAyY7+77zGwp8JCZrQXagAsK8ixEZFBKJpN5/9m8Q0F/fzCqrzH6OXR/lMyp3azbADR0WbYXOL9fiUTkkFBeXsmePTsZNmy4yr4fkskke/bspLy8Muv76IQpiY79+/o1930uX0J33kcTXpVeXV09O3ZsZffu/n91lzqEMXo/gwrFyVZeXkldXX326xcwi0j/VAwtzm/T0veEV1J48Xg5o0YdntN9D9UjlXKlSc1ERAKnohcRCZyKXkQkcCp6EZHAqehFRAKnohcRCZyKXkQkcCp6EZHAqehFRAKnohcRCZyKXkQkcCp6EZHAqehFRAKnohcRCZyKXkQkcCp6EZHAqehFRAKnohcRCZyKXkQkcCp6EZHAqehFRAKnohcRCZyKXkQkcCp6EZHAqehFRAKnohcRCZyKXkQkcOW93WhmFcCDwHhgCHAr8HtgJZAENgFXunuHmc0CLgfagVvd/RkzqwIeAUYDu4CL3H1rYZ6KiIh0p689+s8D29z9dGAacA+wGFiQXhYDzjazMcDVwEeATwK3m9kQYDawMb3uw8CCwjwNERHpSV9F/33gxozr7cAk4Ln09VXAx4EpwDp3b3X3ZmAzcDxwGvDjLuuKiEgR9Tp04+67AcysBniM1B75ne6eTK+yCxgBDAeaM+7a3fLOZX2Kx2PU1lZn+RQKKx4vi0yWTFHNNZhE5fWL8nsZ1WxRzQXRzNZr0QOY2VjgCeBed/+OmX0t4+YaoAnYmb7c2/LOZX1KJJI0Ne3NZtWCq62tjkyWTJm56utr+lhbuhOV9zWqnzGIbrao5oLSZuupC3odujGz9wKrgevd/cH04hfNbGr68jRgDfA8cLqZDTWzEcBEUl/UrgPO7LKuiIgUUV979DcAdcCNZtY5Vj8HWGJmlcDLwGPunjCzJaSKvAyY7+77zGwp8JCZrQXagAsK8ixERKRHfY3RzyFV7F19tJt1G4CGLsv2AucPJKCIiAyMTpgSEQmcil5EJHAqehGRwKnoRUQCp6IXEQmcil5EJHAqehGRwKnoRUQCp6IXEQmcil5EJHAqehGRwKnoRUQCp6IXEQmcil5EJHAqehGRwKnoRUQCp6IXEQmcil5EJHAqehGRwKnoRUQCp6IXEQmcil5EJHAqehGRwKnoRUQCp6IXEQmcil5EJHAqehGRwKnoRUQCp6IXEQmcil5EJHDl2axkZicBd7j7VDM7EXga+GP65qXu/qiZzQIuB9qBW939GTOrAh4BRgO7gIvcfWven4WIiPSoz6I3s3nAF4A96UUnAovd/a6MdcYAVwOTgaHAWjP7KTAb2OjuC83sc8ACYE5+n4KIiPQmmz36V4HzgG+nr08CzMzOJrVXPxeYAqxz91ag1cw2A8cDpwFfS99vFXBjNqHi8Ri1tdVZP4lCisfLIpMlU1RzDSZRef2i/F5GNVtUc0E0s/VZ9O7+uJmNz1j0PLDM3TeY2XzgJuAloDljnV3ACGB4xvLOZX1KJJI0Ne3NZtWCq62tjkyWTJm56utrSpxmcIrK+xrVzxhEN1tUc0Fps/XUBbl8GfuEu2/ovAx8GNgJZG6hBmjqsrxzmYiIFFEuRf8TM5uSvvwxYAOpvfzTzWyomY0AJgKbgHXAmel1pwFrBphXRET6KaujbrqYDdxjZm3Am8Bl7r7TzJaQKvIyYL677zOzpcBDZrYWaAMuyFdwERHJTlZF7+6NwMnpyy8Ap3azTgPQ0GXZXuD8AacUEZGc6YQpEZHAqehFRAKnohcRCZyKXkQkcCp6EZHAqehFRAKnohcRCZyKXkQkcCp6EZHAqehFRAKnohcRCZyKXkQkcCp6EZHAqehFRAKnohcRCZyKXkQkcCp6EZHAqehFRAKnohcRCZyKXkQkcCp6EZHAqehFRAKnohcRCZyKXkQkcCp6EZHAqehFRAKnohcRCZyKXkQkcCp6EZHAlWezkpmdBNzh7lPN7BhgJZAENgFXunuHmc0CLgfagVvd/RkzqwIeAUYDu4CL3H1rAZ6HiIj0oM89ejObBywDhqYXLQYWuPvpQAw428zGAFcDHwE+CdxuZkOA2cDG9LoPAwvy/xRERKQ32QzdvAqcl3F9EvBc+vIq4OPAFGCdu7e6ezOwGTgeOA34cZd1RUSkiPocunH3x81sfMaimLsn05d3ASOA4UBzxjrdLe9c1qd4PEZtbXU2qxZcPF4WmSyZopprMInK6xfl9zKq2aKaC6KZLasx+i46Mi7XAE3AzvTl3pZ3LutTIpGkqWlvDtHyr7a2OjJZMmXmqq+v6WNt6U5U3teofsYgutmimgtKm62nLsil6F80s6nu/t/ANODnwPPAbWY2FBgCTCT1Re064Mz07dOANTlsb8BGjiinrLIq5/tnW6QdbS1sa27PeTtSXLn8A9nS2s7unS0FSCNSOLkU/bVAg5lVAi8Dj7l7wsyWkCryMmC+u+8zs6XAQ2a2FmgDLshX8P4oq6yChVmNGg1sOwubSY1QyWAw/svP9vs+jYums7sAWUQKKauid/dG4OT05VeAj3azTgPQ0GXZXuD8AacUEZGc6YQpEZHAqehFRAKnohcRCZyKXkQkcCp6EZHAqehFRAKnohcRCZyKXkQkcCp6EZHAqehFRAKnohcRCZyKXkQkcCp6EZHAqehFRAKnohcRCZyKXkQkcCp6EZHAqehFRAKnohcRCZyKXkQkcCp6EZHAqehFRAKnohcRCZyKXkQkcCp6EZHAqehFRAKnohcRCZyKXkQkcCp6EZHAqehFRAJXnusdzexFoDl99XXgNmAlkAQ2AVe6e4eZzQIuB9qBW939mQElFhGRfsmp6M1sKIC7T81Y9hSwwN3/28zuA842s18BVwOTgaHAWjP7qbu3Dji5iIhkJdc9+hOAajNbnX6MG4BJwHPp21cBnwASwLp0sbea2WbgeGB9bw8ej8eora3OMVppFSt3PF42aF+jwS7fr3uU38uoZotqLohmtlyLfi9wJ7AMmECq2GPunkzfvgsYAQznneGdzOW9SiSSNDXtzTHau9XX1+TtsfqSz9y9qa2tPrCtYj4/yf97nPleRk1Us0U1F5Q2W09dkGvRvwJsThf7K2a2jdQefacaoAnYmb7cdbmIiBRJrkfdzADuAjCzI0jtua82s6np26cBa4DngdPNbKiZjQAmkvqiVkREiiTXPfrlwEozW0vqKJsZwNtAg5lVAi8Dj7l7wsyWkCr9MmC+u+/LQ24REclSTkXv7m3ABd3c9NFu1m0AGnLZjkjB7N9H46LpOd21P9+JdLS1sK25PaftiORLzsfRiwxqFUNhYZ/HBQxY2cJmUscgiJSOzowVEQmcil5EJHAqehGRwGmMPs+KefKSTpQSkWyo6PNs/JefLfo2cz16REQODRq6EREJnIpeRCRwKnoRkcCp6EVEAqeiFxEJnI66ESmwbA6DzWadltZ2du9syUckOcSo6EUKLF+H3DYums7uvDySHGo0dCMiEjgVvYhI4FT0IiKB0xj9ILX++tOor3tnPnVNgyAiPVHRD1L1dSOK8sMZACxsLs52QjSAX7LqTm9H5+jXrKQnKnqRQirSL1mBfs1KeqYxehGRwGmPXiQghfiNgsOGV+lErUFORS8SkEL8HoJO1Br8NHQjIhI4Fb2ISOBU9CIigdMYvUgo8nzMfqauX/LqmP3BRUUvEgodsy890NCNiEjgVPQiIoEr+NCNmZUB9wInAK3Ape6+udDbLYkCjpGKRE0hTs7KpF/Uyp9ijNGfAwx191PM7GTgLuDsImy3+Io4RqqJxqTUCnFyViadqJU/xSj604AfA7j7r81sciE3dtjwKqqG6DtmkcFu3/4E9bUVqR2obuT9L4r9+w5sK7S/JmLJZLKgGzCzZcDj7r4qff0N4Gh37+3YrK3AloIGExEJzzigvuvCYuz67gQy/+kt66PkoZugIiKSm2IcdbMOOBMgPUa/sQjbFBGRtGLs0T8B/K2Z/RKIAZcUYZsiIpJW8DF6EREpLZ0wJSISOBW9iEjgVPQiIoHTmUU9MLMK4CFgPJAAZrn7H0oaCjCzIcAK4GhSh65e6e5/LHGmk4A73H2qmR0DrASSwKZ0vo4oZEtfPxc4390vKFWmrrnM7EPAt0h9zlqBC939rQjkOg54gNRBFL8FvujuiVLk6potY9kF6VynRCGXmZ0IPA10/j+51N0fLVW2Ttqj79mZQLm7nwp8BbitxHk6zQJ2u/vJwBeBe0oZxszmAcuAztMXFwML3P10UgVRsukuumYzs7uB2ynx576b1+xuUmU1FfgBcH1Ecn0VuMHdPwJUA39filw9ZCP9D+RMUp+zqOQ6EVjs7lPT/5W85EFF35tXgPL0pGzDgf0lztPpOGAVgLs7MLG0cXgVOC/j+iTgufTlVcDHi57oHV2z/RKYXaIsmbrm+py7v5S+XA7sK34k4N25Pu3uvzCzSmAMUJK/MtIOymZmI4FFwNySJUrp7vM/3cx+YWbLzaywM79lSUXfs92khm3+ADQAS0qa5h0vAX9nZrH0CWhHmlm8VGHc/XEO/kcw5u6dx+zuAoo0y9u7dc2W3rsq+fHE3eT6PwAzOxW4CvhGRHIlzGwc8DtgFOClyNU1W/rzvhy4hhL/+kk3n//ngevc/QzgNeCmkgTrQkXfs2uAn7j7saSmWH7IzLqfXam4HiQ1Nv9z4CxgQynHTbuROR5fAzSVKshgYmb/ANwHTHf3raXO08ndt7j7BFLZFpc6T9okYAKwFPgucJyZfbO0kQ54wt03dF4GPlzKMJ1U9D3bAXTOBbwdqABKtuec4W+Atenx3CdI7TVEyYtmNjV9eRqwpoRZBgUz+zypPfmp7h6Z99PMnjKzCemruzj4H/GScffn3f0D6f8HPgf83t1LPYTT6SdmNiV9+WPAht5WLhYdddOzbwAPmtkaoJLUl1J7SpwJUt/m32JmXyK1tzyzxHm6uhZoSI/rvgw8VuI8kZYehlgCvAH8wMwAnnP3KPzJvwhYaWZtwF7g0hLnGQxmA/ekX7M3gctKnAfQFAgiIsHT0I2ISOBU9CIigVPRi4gETkUvIhI4Fb2ISOBU9CIigVPRi4gE7v8BIAOYF7cCZ8oAAAAASUVORK5CYII=\n"
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(y_pred,label='prediction')\n",
    "plt.hist(y_train,label='target')\n",
    "plt.legend()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T07:52:51.915637100Z",
     "start_time": "2023-12-03T07:52:49.756723300Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 构建均方根误差"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "outputs": [],
   "source": [
    "def rmse(y,y_pred):\n",
    "    error = y_pred-y # 预测值与真实值之间的差值\n",
    "    mse = (error**2).mean() # 计算mse\n",
    "    return np.sqrt(mse) # 取平方根得到rmse"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T07:57:30.507266300Z",
     "start_time": "2023-12-03T07:57:30.476262800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "outputs": [
    {
     "data": {
      "text/plain": "0.7554192603920132"
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rmse(y_train,y_pred)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T07:59:19.682510Z",
     "start_time": "2023-12-03T07:59:19.641504300Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 验证模型(使用验证集和测试集)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "outputs": [
    {
     "data": {
      "text/plain": "nan"
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_num = df_val[base]\n",
    "df_num.fillna(0)\n",
    "X_val = df_num.values\n",
    "y_pred = w_0 +X_val.dot(w)\n",
    "rmse(y_val,y_pred)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T08:08:31.915311Z",
     "start_time": "2023-12-03T08:08:31.900273100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "outputs": [
    {
     "data": {
      "text/plain": "nan"
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_num = df_test[base]\n",
    "df_num.fillna(0)\n",
    "X_test = df_num.values\n",
    "y_pred = w_0 +X_test.dot(w)\n",
    "rmse(y_test,y_pred)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T08:08:32.949631300Z",
     "start_time": "2023-12-03T08:08:32.934577300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "outputs": [],
   "source": [
    "def prepare_X(df):\n",
    "    df_num = df[base]\n",
    "    df_num = df_num.fillna(0)\n",
    "    X = df_num.values\n",
    "    return X"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T08:06:33.466449600Z",
     "start_time": "2023-12-03T08:06:33.433449Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "验证集: 0.761653099130156\n"
     ]
    }
   ],
   "source": [
    "X_train = prepare_X(df_train)\n",
    "w_0,w = train_linear_regression(X_train,y_train)\n",
    "\n",
    "X_val = prepare_X(df_val)\n",
    "y_pred = w_0+X_val.dot(w)\n",
    "print('验证集:',rmse(y_val,y_pred))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T08:08:14.512302900Z",
     "start_time": "2023-12-03T08:08:14.467292Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 简单的特征工程"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "outputs": [],
   "source": [
    "def prepare_X(df):\n",
    "    df = df.copy() # 创建输入参数的复本以防止负作用\n",
    "    features = base.copy() # 创建基本特征的副本\n",
    "    df['age'] = 2017-df.year # 计算age特性\n",
    "    features.append('age') # 在模型的特征名称列表中添加age\n",
    "\n",
    "    df_num = df[features]\n",
    "    df_num = df_num.fillna(0)\n",
    "    X = df_num.values\n",
    "    return X"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T08:15:54.914657100Z",
     "start_time": "2023-12-03T08:15:54.884658400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "验证集: 0.5172055461058299\n"
     ]
    }
   ],
   "source": [
    "X_train = prepare_X(df_train)\n",
    "w_0,w = train_linear_regression(X_train,y_train)\n",
    "X_val = prepare_X(df_val)\n",
    "y_pred = w_0+X_val.dot(w)\n",
    "print('验证集:',rmse(y_val,y_pred))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T08:19:03.412993500Z",
     "start_time": "2023-12-03T08:19:02.829458700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "outputs": [
    {
     "data": {
      "text/plain": "<matplotlib.legend.Legend at 0x1dd92fe3f70>"
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXUAAAD3CAYAAADi8sSvAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAViElEQVR4nO3da5RcZZ3v8W9XdXfSnVs3pCOXiQlofCbqAkYcRBggL7xBxCiKc84MMhDIAIMEGbnJ5RgEJC4RFhkHhXAJsM4cL6ACwRyZ5SUT8JYRmAVOeBiQDh4dWTFJd9JJpy/VfV5UJXRip7u6u7oq9fT386pq7117/5+uql/vevbez67p7+9HkpSGTKULkCSVjqEuSQkx1CUpIYa6JCXEUJekhNRWcuN9fX39uVx1nH2TzdZQLbUWyzZVB9tUHcrZprq67B+BlsHmVTTUc7l+2tp2VrKEojU1NVZNrcWyTdXBNlWHcrappWXaxv3Ns/tFkhJiqEtSQgx1SUpIRfvUJaUvl+tl69ZN9PZ275n2+us1pDZEyXi0qba2nubmFrLZ4qPaUJc0rrZu3cTkyY1MmXIINTU1AGSzGXK5vgpXVlqlblN/fz87dmxj69ZNzJx5aNGvs/tF0rjq7e1mypTpewJdxampqWHKlOl7/cIphqEuadwZ6KMzmr+b3S+Symrq9AYaJpUuejq7eunY1lmy9VU7Q11SWTVMqmXu1U+UbH2tyxfSUbK17d/nP/85Fi36ON3d3bz++h9YtOiMQZd79NHvsHDhR3j11Vd46ql/49xzl5ShujcY6qoqB8+oJVPfMOQyLS3TSr7dvu5ONrf3lny9qj7HH3/CkPMfeuh+PvShhcybF5g3L5SpqjcY6qoqmfoGWDaj/Ntd1g5sL/t2VRrf//7jrFu3lp07d9DW1sa5557PvffexezZc6irq+OKK65h+fIv0N7eDsBnPnMFb3nLW3nkkW+xevX3OPjgmWzdunXPujZubOWiiy5h1ap7WLduLblcjjPO+ASZTIYtWzazbNk1nHnm/+TRRx/hhhtu4ckn1/Ctb/0f6urqmD37zVx55bU8+eQafvazp+nq2sXvfvf/+Nu//TtOO+30MbfVUJc0IXR27uT22/+ZtratLFnyd/T19XHOOefxtrf9OXfeuYJjjz2Oj33sE/z2t6/xxS/ewJe/fAff/vY3ePDBb5DJZDjvvLP2Wt9LL73IL37xU+6+exU9PT3cffc/c8kl/8iqVfeybNkX+fWvnwegvb2Ne++9i/vv/980Nk5hxYqv8Oijj9DQ0MiOHR3cdttX+e1vX+Oqqy4z1CWpWMcc8y4ymQwHHXQw06ZNZ+PGV3nzm+cC8JvfvMwzz/w7P/zhkwBs376djRtbOeKII6mvrwdg/vx37LW+117byPz57yCbzZLNZrnssisGPU/997//HUcccSSNjVMAOProd7F+/c95+9vfyVvf+jYAZs16E93dIzt1cX88pVHShBDjiwBs2bKZHTt20Nx80J5TBufMmcsnP/k3fPWrd3Pjjcv5wAc+xGGHHU5r62/o6tpFLpfjpZfiXuubM2cuL70U6evro7e3l6VLL6K7u5uamsxeV5YeeujhtLa+Smdn/gyd5557htmz3wyMz6me7qlLKqvOrl5aly8s6fqKsWXLZi699CI6Ojr47Gev4tZbb9kz7+yzF7N8+Y089th32LlzB4sX/z3Nzc2cf/6FXHjhYpqammlo2PsA/bx5gfe8571cdNF59PX18fGPn0l9fT1HH30Ml1++lMWL/x6ApqYmFi++gKVLL6CmJsOf/dlsLrzw03t+FZRaTSXHX+jpyfVXy5jKjv98YGhpmVaRA6Usa2fTpsocKK3G92mgP/xhI4ccMmevaeUeJmDgwc3xMl5tGuzv19Iy7VfAuwdb3u4XSUqI3S+SkleKs0qqhXvqkpQQQ12SEmKoS1JC7FOXVFbFjN8zEo7LszdDXVJZlXr8nuHG5enq6uLJJ9dw+ukfLdk2x3O9Y2X3i6Skbdmymccf/17VrHes3FOXlLQHH7yP1tZXuf/+lWzY8J90d3exbVs755yzhJNPXsCnPvXJPaM1XnbZldxww7X09PQwe/YcnnlmPd/85vd49tlfcffdd5LNZjnssMO58spr91pvucdMH4qhLilpZ5+9mFdeeZl3vvMojj76L3jXu97N88//B/feexcnn7yAzs7OPaM1rljxFU46aQFnnHEm69f/nPXrf05/fz9f+tLNfO1r99DcfBArV36N73//8T3rPZACHQx1SRPEwQfP5IEH7uWJJx4FaujtfePg6u7RGltbWzn11A8DcNRRfwFAW9tWNm/+I9dffzWQ70s/7rjjy1r7SBjqkpKWHzWxj3vu+Tqnn/5R3vveE3niicdYs2b1gGXyoyUeeeRbeOGF55k3L+wZD33GjCZmzZrF8uW3MXXqVJ56ai0NDY171nugMdQllVVfd2fhjJXSrW8ozc3N9PT08uqrr3DHHbfy0EP3M2vWm2hra/uTZc866xxuvPF/8aMf/SszZ7ZQW1tLJpPh0ksv54orLqW/v5/Gxilcf/0NNDZOoaenlzvvXME//MPSkrVnrAx1SWW1ub2XbHZH2UZpnDRpEqtW/ct+5z/88ON7Hm/Y8ALnn38B8+e/g/Xrf8HmzX8E4Ljjjh+0y2Wo9VaKoS5JBYceeji33PIFstksfX19fOYzl1e6pBEbNtRDCHXAA8BcIAcsAXqBVUA/8AJwcYyxL4SwBLigMP+mGOPqwdYpSQeiuXOP4K677q90GWNSzMVHpwG1McYTgC8ANwO3AdfFGE8CaoBFIYRDgKXAicAHgVtCCJPGp2xJ1aSSN+OpZqP5uxXT/fISUBtCyADTgR7geGBtYf4a4APk9+KfjjF2AV0hhJeBo4D1+1txNltDU1PjiIuuhGw2UzW1FivFNo2nSv2tqv19am9voLNzO1OnztjrnpzZbHoXtJeyTf39/XR0bKOhoWFE738xod5BvuvlRWAm8GHg5Bjj7n8h24EZ5AN/4CHt3dP3K5frr5rbdFX7LcUGU41tammZVrFtV+pvVY3v00BTpx7E1q2b2LZt655pNTU1ye29j0ebamvraW5u+ZP3f6jvQTGhfhnwgxjj50IIs4EfAfUD5k8D2oBthcf7Tpc0gWWztcyceehe06r9H9VgDpQ2FfNbYStv7IFvAeqAZ0MICwrTTgXWAb8ETgohTA4hzADmkz+IKkkqk2L21G8H7gshrCO/h34N8O/AyhBCPbABeDjGmAshrCAf8Bng2hjjrnGqW5I0iGFDPcbYAXxykFmnDLLsSmBlCeqSJI1CeoefJWkCM9QlKSGGuiQlxFCXpIQY6pKUEENdkhJiqEtSQgx1SUqIoS5JCTHUJSkhhrokJcRQl6SEeONpqUgjvUFHZ1cvHds6x6kaaXCGulSkuVc/MaLlW5cvpGOcapH2x+4XSUqIoS5JCTHUJSkhhrokJcRQl6SEGOqSlBBDXZISYqhLUkK8+EgqRs8uWpcvHPHLRnoV6r76ujvJjWkNmmgMdakYdZNh2YyybzazrJ1cj7Gu4tn9IkkJMdQlKSGGuiQlxFCXpIQY6pKUEENdkhJiqEtSQgx1SUqIoS5JCTHUJSkhhrokJcRQl6SEGOqSlJCiRmkMIXwO+AhQD9wJrAVWAf3AC8DFMca+EMIS4AKgF7gpxrh6PIqWJA1u2D31EMIC4ATgROAUYDZwG3BdjPEkoAZYFEI4BFhaWO6DwC0hhEnjVLckaRDFdL98EHge+C7wOLAaOJb83jrAGuB9wHHA0zHGrhhjO/AycFTJK5Yk7Vcx3S8zgTnAh4EjgMeATIyxvzB/OzADmA60D3jd7un7lc3W0NTUONKaKyKbzVRNrcVKsU0pSvF9sk3jp5hQ3wy8GGPsBmIIYRf5LpjdpgFtwLbC432n71cu109b286RVVwhTU2NVVNrsaqxTWO9PVw1yuX6qu59Gk41fvaGU842DfU9KKb75SngQyGEmhDCYcAU4IeFvnaAU4F1wC+Bk0IIk0MIM4D55A+iSpLKZNg99Rjj6hDCyeRDOwNcDLwKrAwh1AMbgIdjjLkQwgryAZ8Bro0x7hq/0iVJ+yrqlMYY45WDTD5lkOVWAivHWpQkaXS8+EiSEmKoS1JCDHVJSoihLkkJMdQlKSGGuiQlxFCXpIQY6pKUEENdkhJiqEtSQgx1SUqIoS5JCTHUJSkhhrokJcRQl6SEGOqSlBBDXZISYqhLUkIMdUlKiKEuSQkx1CUpIYa6JCXEUJekhBjqkpQQQ12SEmKoS1JCDHVJSoihLkkJMdQlKSGGuiQlxFCXpITUVroASUOrq8vS0jJt2OU6u3rp2NZZhop0IDPUpQPc3KufKGq51uUL6RjnWnTgs/tFkhJiqEtSQgx1SUqIoS5JCTHUJSkhRZ39EkKYBfwKeD/QC6wC+oEXgItjjH0hhCXABYX5N8UYV49LxZKk/Rp2Tz2EUAfcBew+AfY24LoY40lADbAohHAIsBQ4EfggcEsIYdL4lCxJ2p9iul9uBb4O/L7w/FhgbeHxGuB9wHHA0zHGrhhjO/AycFSJa5UkDWPI7pcQwjnAphjjD0IInytMrokx9hcebwdmANOB9gEv3T19SNlsDU1NjSMuuhKy2UzV1FqsFNs00VXL+5niZ+9AadNwfeqLgf4QwvuAY4AHgVkD5k8D2oBthcf7Th9SLtdPW9vOERVcKU1NjVVTa7GqsU3FXC4/kVXL+1mNn73hlLNNQ30Phgz1GOPJux+HEH4CXAh8OYSwIMb4E+BU4MfAL4GbQwiTgUnAfPIHUaX9mjq9gYZJjlQhldJovlGfBVaGEOqBDcDDMcZcCGEFsI58P/21McZdJaxTCWqYVFv0uCa7tS5fOE7VSGkoOtRjjAsGPD1lkPkrgZUlqEmSNEpefCRJCTHUJSkhhrokJcRQl6SEGOqSlBBDXZISYqhLUkIMdUlKiKEuSQkx1CUpIYa6JCXEUJekhBjqkpQQQ12SEmKoS1JCDHVJSoihLkkJMdQlKSGGuiQlxFCXpIQUfePpanLwjFoy9Q0lX29Ly7Qh5/d1d7K5vbfk25WkYiUZ6pn6Blg2o/zbXdYObC/7diVpN7tfJCkhhrokJcRQl6SEGOqSlBBDXZISYqhLUkIMdUlKiKEuSQkx1CUpIYa6JCXEUJekhBjqkpQQQ12SEmKoS1JCDHVJSoihLkkJGfImGSGEOuA+YC4wCbgJ+E9gFdAPvABcHGPsCyEsAS4AeoGbYoyrx69sjYep0xtomJTkfVOkCWO4b/BZwOYY46dCCAcDzwLPAdfFGH8SQvg6sCiE8DNgKfBuYDLwVAjhX2OMXeNZvEqrYVItc69+omzba12+sGzbkiaK4UL928DDA573AscCawvP1wAfAHLA04UQ7wohvAwcBawfauXZbA1NTY2jqfuAVU3tyWYzVVWvhlct72eKn70DpU1DhnqMsQMghDCNfLhfB9waY+wvLLIdmAFMB9oHvHT39CHlcv20te0cRdlDG+4G0eNpPNozXpqaGveqt5J/N5VGtXz+9v3spaCcbRrquzrsgdIQwmzgx8BDMcZ/AfoGzJ4GtAHbCo/3nS5JKqPhDpS+CXgS+HSM8YeFyc+GEBbEGH8CnEo+8H8J3BxCmEz+gOp88gdRJY1Fz64RHXsoxa+tvu5ONrf3jnk9qozh+tSvAZqB60MI1xemXQqsCCHUAxuAh2OMuRDCCmAd+b3/a2OMu8araGnCqJsMy4btySypzLJ28j2oqkbD9alfSj7E93XKIMuuBFaWqC5J0ih4UrJGbf1Vf0VL89j2Ij2tUSotQ/0AVa4LgcbSB9vSPKPsXQMsax9+GWkCM9QPUOW+EAjca9Ybivln39nVS8e2zjJUo5Ew1CX9iWJ2KFqXL6SjDLVoZBzQS5ISYqhLUkIMdUlKiKEuSQkx1CUpIYa6JCXEUJekhBjqkpQQQ12SEmKoS1JCDHVJSohjv5SY9/mUVEmGeomVamRFR0yUNBp2v0hSQgx1SUpI1Xa/lOvOQJJUTao2FYe6M5D90ZImKrtfJCkhhrokJcRQl6SEGOqSlBBDXZISUrVnv0iqrF09uTENizHS13Z29dKxrXPU25soDHVJozK5LluyYTGK0bp8IR1l21r1MtQl7a1nV9HXepTympC+7k4y9Q1DLjMeA+b1dXeyub235OutFENd0t7qJsOyGWXfbGZZe+W2y/ayb3e8eKBUkhJiqEtSQgx1SUqIoS5JCfFAqaQJb7Czaqr1vHhDvZRGcCpYMYpd16at7fzll54q2XaliWaw8+2r9bx4Q72UKnQqWMuy9rJvU9KBqaShHkLIAHcCRwNdwPkxxpdLuQ1JKqkhfmGP9GKnkSw/Xhc9lXpP/aPA5Bjje0MIxwNfARaVeBuSVDqVvNhqHC56KnWo/xXwfwFijD8PIby7xOvXYErYl++tAKXqVtPf31+ylYUQ7gEeiTGuKTx/DTgyxri/3xibgI0lK0CSJoY5QMtgM0q9p74NGNiplBki0GE/RUmSRqfUFx89DZwGUOhTf77E65ckDaHUe+rfBd4fQvgpUAOcW+L1S5KGUNI+dUlSZTn2iyQlxFCXpIQY6pKUEMd+GUYIoQ54AJgL5IAlMcYXK1rUGIUQJgH3A0eSPw314hjjf1W2qtELIbwH+FKMcUEI4a3AKqAfeIF82/oqWd9oDGxT4fnHgDNjjH9T0cLGYJ/36Rjgn8h/p7qAs2OMr1e0wFHYp01vB+4mf5LIfwCXxBhz5a7JPfXhnQbUxhhPAL4A3FzhekphCdARYzweuAT4aoXrGbUQwpXAPcDkwqTbgOtijCeR/3JV3TAV+7YphHAHcAtV/H0d5H26g3zoLQC+A1xVodJGbZA2fRG4JsZ4ItAIfKQSdVXth6SMXgJqC4OVTQd6KlxPKbwdWAMQY4zA/MqWMyavAGcMeH4ssLbweA3wvrJXNHb7tumnwEUVqqVU9m3T/4gxPld4XAvsKn9JY7Zvmz4eY/y3EEI9cAhQkV8ehvrwOsh3vbwIrARWVLSa0ngO+HAIoaZwkdjhIYRspYsajRjjI+z9j7Ymxrj7PN3tQPlHahqjfdsUY/wm+e6kqjVIm/4bIIRwAvBp4PYKlTZqg7QpF0KYA/wamAnEStRlqA/vMuAHMca3kR9S+IEQwuRhXnOgu498X/qPgdOBX1Wi72+cDOw/nwa0VaoQDS2E8NfA14GFMcZNla6nFGKMG2OM88i367ZK1GCoD28rsPsuFFuAOqAq92oH+EvgqUJ/5neB31S2nJJ6NoSwoPD4VGBdBWvRfoQQziK/h74gxpjE5y+E8FgIYV7h6Xb23sEoG89+Gd7twH0hhHVAPfkDITsqXNNY/RdwYwjhcvJ7sudVuJ5S+iywstCvuQF4uML1aB+Frr4VwGvAd0IIAGtjjJ+vaGFjtxxYFULoBnYC51eiCIcJkKSE2P0iSQkx1CUpIYa6JCXEUJekhBjqkpQQQ12SEmKoS1JC/j/aCDyJAqZk1wAAAABJRU5ErkJggg==\n"
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(y_pred,label='prediction')\n",
    "plt.hist(y_val,label='target')\n",
    "plt.legend()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T08:20:19.305976500Z",
     "start_time": "2023-12-03T08:20:18.749978700Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 处理分类变量"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "outputs": [
    {
     "data": {
      "text/plain": "chevrolet        1123\nford              881\nvolkswagen        809\ntoyota            746\ndodge             626\nnissan            558\ngmc               515\nhonda             449\nmazda             423\ncadillac          397\nmercedes-benz     353\nsuzuki            351\nbmw               334\ninfiniti          330\naudi              328\nhyundai           303\nvolvo             281\nsubaru            256\nacura             252\nkia               231\nmitsubishi        213\nlexus             202\nbuick             196\nchrysler          187\npontiac           186\nlincoln           164\noldsmobile        150\nland_rover        143\nporsche           136\nsaab              111\naston_martin       93\nplymouth           82\nbentley            74\nferrari            69\nfiat               62\nscion              60\nmaserati           58\nlamborghini        52\nrolls-royce        31\nlotus              29\ntesla              18\nhummer             17\nmaybach            16\nmclaren             5\nalfa_romeo          5\nspyker              3\nbugatti             3\ngenesis             3\nName: make, dtype: int64"
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['make'].value_counts()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T08:30:20.287633200Z",
     "start_time": "2023-12-03T08:30:20.282633300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "outputs": [],
   "source": [
    "def prepare_X(df):\n",
    "    df = df.copy() # 创建输入参数的复本以防止负作用\n",
    "    features = base.copy() # 创建基本特征的副本\n",
    "\n",
    "    df['age'] = 2017-df.year # 计算age特性\n",
    "    features.append('age') # 在模型的特征名称列表中添加age\n",
    "\n",
    "    for v in [2,3,4]:\n",
    "        feature ='num_doors%s'%v # 为新特征命名\n",
    "        value = (df['number_of_doors']==v).astype(int)# 创建特征的独热编码\n",
    "        df[feature] = value\n",
    "        features.append(feature)\n",
    "\n",
    "    for v in ['chevrolet','ford','volkswagen',\"toyota\",'dodge']: # 对汽车品牌进行编码\n",
    "        feature = 'is_%s'%v\n",
    "        value = (df['make']==v).astype(int)\n",
    "        df[feature] = value\n",
    "        features.append(feature)\n",
    "\n",
    "    for v in ['compact','midsize','large']:# 对汽车大小进行编码\n",
    "        feature = 'is_%s'%v\n",
    "        value = (df['vehicle_size']==v).astype(int)\n",
    "        df[feature] = value\n",
    "        features.append(feature)\n",
    "\n",
    "    df_num = df[features]\n",
    "    df_num = df_num.fillna(0)\n",
    "    X = df_num.values\n",
    "    return X"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T09:12:01.424238600Z",
     "start_time": "2023-12-03T09:12:01.394239800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "验证集 53.00343980375652\n"
     ]
    }
   ],
   "source": [
    "X_train = prepare_X(df_train)\n",
    "w_0,w = train_linear_regression(X_train,y_train)\n",
    "X_val = prepare_X(df_val)\n",
    "y_pred = w_0+X_val.dot(w)\n",
    "print('验证集',rmse(y_val,y_pred))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T09:12:05.884740400Z",
     "start_time": "2023-12-03T09:12:05.750305200Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 正则化"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "outputs": [],
   "source": [
    "def train_linear_regression_reg(X,y,r=0.0):\n",
    "    ones = np.ones(X.shape[0])\n",
    "    X = np.column_stack([ones,X])\n",
    "    XTX = X.T.dot(X)\n",
    "    reg = r*np.eye(XTX.shape[0])\n",
    "    XTX = XTX+reg\n",
    "    XTX_inv = np.linalg.inv(XTX)\n",
    "    w = XTX_inv.dot(X.T).dot(y)\n",
    "    return w[0],w[1:]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T09:07:49.134372600Z",
     "start_time": "2023-12-03T09:07:49.129340900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0,10.68,-0.01,-0.10\n",
      "0.001,10.68,-0.01,-0.10\n",
      "0.01,10.65,-0.01,-0.10\n",
      "0.1,10.42,-0.01,-0.10\n",
      "1,9.02,-0.00,-0.09\n",
      "10,6.89,0.00,-0.09\n"
     ]
    }
   ],
   "source": [
    "for r in [0,0.001,0.01,0.1,1,10]:\n",
    "    w_0,w = train_linear_regression_reg(X_train,y_train,r=r)\n",
    "    print('{},{:.2f},{:.2f},{:.2f}'.format(r,w_0,w[2],w[5]))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T09:07:52.622731400Z",
     "start_time": "2023-12-03T09:07:52.591700300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "验证集: 0.5060375464024449\n"
     ]
    }
   ],
   "source": [
    "X_train = prepare_X(df_train)\n",
    "w_0,w = train_linear_regression_reg(X_train,y_train,r=0.001)\n",
    "X_val = prepare_X(df_val)\n",
    "y_pred = w_0+X_val.dot(w)\n",
    "print('验证集:',rmse(y_val,y_pred))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T09:26:26.284311500Z",
     "start_time": "2023-12-03T09:26:26.223317400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "测试集 0.5035870970503362\n"
     ]
    }
   ],
   "source": [
    "X_test = prepare_X(df_test)\n",
    "y_pred = w_0+X_test.dot(w)\n",
    "print('测试集',rmse(y_test,y_pred))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T09:26:27.091552500Z",
     "start_time": "2023-12-03T09:26:27.074560400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "outputs": [
    {
     "data": {
      "text/plain": "(make                                toyota\n model                                venza\n year                                  2013\n engine_fuel_type          regular_unleaded\n engine_hp                              268\n engine_cylinders                         6\n transmission_type                automatic\n driven_wheels              all_wheel_drive\n number_of_doors                          4\n market_category      crossover,performance\n vehicle_size                       midsize\n vehicle_style                        wagon\n highway_mpg                             25\n city_mpg                                18\n popularity                            2031\n Name: 11270, dtype: object,\n 10.345638111452145)"
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ad=df_test.iloc[2]\n",
    "price = y_test[2]\n",
    "ad,price"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T09:20:30.693605500Z",
     "start_time": "2023-12-03T09:20:30.684608400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "outputs": [
    {
     "data": {
      "text/plain": "         make  model  year  engine_fuel_type engine_hp engine_cylinders  \\\n11270  toyota  venza  2013  regular_unleaded       268                6   \n\n      transmission_type    driven_wheels number_of_doors  \\\n11270         automatic  all_wheel_drive               4   \n\n             market_category vehicle_size vehicle_style highway_mpg city_mpg  \\\n11270  crossover,performance      midsize         wagon          25       18   \n\n      popularity  \n11270       2031  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>make</th>\n      <th>model</th>\n      <th>year</th>\n      <th>engine_fuel_type</th>\n      <th>engine_hp</th>\n      <th>engine_cylinders</th>\n      <th>transmission_type</th>\n      <th>driven_wheels</th>\n      <th>number_of_doors</th>\n      <th>market_category</th>\n      <th>vehicle_size</th>\n      <th>vehicle_style</th>\n      <th>highway_mpg</th>\n      <th>city_mpg</th>\n      <th>popularity</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>11270</th>\n      <td>toyota</td>\n      <td>venza</td>\n      <td>2013</td>\n      <td>regular_unleaded</td>\n      <td>268</td>\n      <td>6</td>\n      <td>automatic</td>\n      <td>all_wheel_drive</td>\n      <td>4</td>\n      <td>crossover,performance</td>\n      <td>midsize</td>\n      <td>wagon</td>\n      <td>25</td>\n      <td>18</td>\n      <td>2031</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ad = pd.DataFrame(ad.T)\n",
    "ad"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T09:29:18.173810900Z",
     "start_time": "2023-12-03T09:29:18.150773100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "outputs": [
    {
     "data": {
      "text/plain": "array([10.22692046])"
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ad_test = prepare_X(ad)\n",
    "y_pred = w_0+ad_test.dot(w)\n",
    "y_pred"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T09:30:53.150075800Z",
     "start_time": "2023-12-03T09:30:53.107083400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "outputs": [
    {
     "data": {
      "text/plain": "array([27636.26886661])"
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.expm1(y_pred)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T09:31:24.898613700Z",
     "start_time": "2023-12-03T09:31:24.837537200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "outputs": [
    {
     "data": {
      "text/plain": "make                                toyota\nmodel                                venza\nyear                                  2013\nengine_fuel_type          regular_unleaded\nengine_hp                              268\nengine_cylinders                         6\ntransmission_type                automatic\ndriven_wheels              all_wheel_drive\nnumber_of_doors                          4\nmarket_category      crossover,performance\nvehicle_size                       midsize\nvehicle_style                        wagon\nhighway_mpg                             25\ncity_mpg                                18\npopularity                            2031\nmsrp                                 31120\nName: 11270, dtype: object"
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[11270]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-03T09:32:19.289822600Z",
     "start_time": "2023-12-03T09:32:19.283826800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
